Make Actions tasks/jobs timeouts configurable by the user (#27400)

With this PR we added the possibility to configure the Actions timeouts
values for killing tasks/jobs.
Particularly this enhancement is closely related to the `act_runner`
configuration reported below:
```
# The timeout for a job to be finished.
# Please note that the Gitea instance also has a timeout (3h by default) for the job.
# So the job could be stopped by the Gitea instance if it's timeout is shorter than this.
timeout: 3h
```

---

Setting the corresponding key in the INI configuration file, it is
possible to let jobs run for more than 3 hours.

Signed-off-by: Francesco Antognazza <francesco.antognazza@gmail.com>
This commit is contained in:
Francesco Antognazza 2023-10-02 23:09:26 +02:00 committed by GitHub
parent dfa4e5857f
commit bc21723717
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 21 additions and 9 deletions

@ -2568,6 +2568,12 @@ LEVEL = Info
;DEFAULT_ACTIONS_URL = github
;; Default artifact retention time in days, default is 90 days
;ARTIFACT_RETENTION_DAYS = 90
;; Timeout to stop the task which have running status, but haven't been updated for a long time
;ZOMBIE_TASK_TIMEOUT = 10m
;; Timeout to stop the tasks which have running status and continuous updates, but don't end for a long time
;ENDLESS_TASK_TIMEOUT = 3h
;; Timeout to cancel the jobs which have waiting status, but haven't been picked by a runner for a long time
;ABANDONED_JOB_TIMEOUT = 24h
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@ -1389,6 +1389,9 @@ PROXY_HOSTS = *.github.com
- `STORAGE_TYPE`: **local**: Storage type for actions logs, `local` for local disk or `minio` for s3 compatible object storage service, default is `local` or other name defined with `[storage.xxx]`
- `MINIO_BASE_PATH`: **actions_log/**: Minio base path on the bucket only available when STORAGE_TYPE is `minio`
- `ARTIFACT_RETENTION_DAYS`: **90**: Number of days to keep artifacts. Set to 0 to disable artifact retention. Default is 90 days if not set.
- `ZOMBIE_TASK_TIMEOUT`: **10m**: Timeout to stop the task which have running status, but haven't been updated for a long time
- `ENDLESS_TASK_TIMEOUT`: **3h**: Timeout to stop the tasks which have running status and continuous updates, but don't end for a long time
- `ABANDONED_JOB_TIMEOUT`: **24h**: Timeout to cancel the jobs which have waiting status, but haven't been picked by a runner for a long time
`DEFAULT_ACTIONS_URL` indicates where the Gitea Actions runners should find the actions with relative path.
For example, `uses: actions/checkout@v3` means `https://github.com/actions/checkout@v3` since the value of `DEFAULT_ACTIONS_URL` is `github`.

@ -6,6 +6,7 @@ package setting
import (
"fmt"
"strings"
"time"
"code.gitea.io/gitea/modules/log"
)
@ -18,6 +19,9 @@ var (
ArtifactRetentionDays int64 `ini:"ARTIFACT_RETENTION_DAYS"`
Enabled bool
DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"`
ZombieTaskTimeout time.Duration `ini:"ZOMBIE_TASK_TIMEOUT"`
EndlessTaskTimeout time.Duration `ini:"ENDLESS_TASK_TIMEOUT"`
AbandonedJobTimeout time.Duration `ini:"ABANDONED_JOB_TIMEOUT"`
}{
Enabled: true,
DefaultActionsURL: defaultActionsURLGitHub,
@ -82,5 +86,9 @@ func loadActionsFrom(rootCfg ConfigProvider) error {
Actions.ArtifactRetentionDays = 90
}
Actions.ZombieTaskTimeout = sec.Key("ZOMBIE_TASK_TIMEOUT").MustDuration(10 * time.Minute)
Actions.EndlessTaskTimeout = sec.Key("ENDLESS_TASK_TIMEOUT").MustDuration(3 * time.Hour)
Actions.AbandonedJobTimeout = sec.Key("ABANDONED_JOB_TIMEOUT").MustDuration(24 * time.Hour)
return err
}

@ -12,20 +12,15 @@ import (
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/actions"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
)
const (
zombieTaskTimeout = 10 * time.Minute
endlessTaskTimeout = 3 * time.Hour
abandonedJobTimeout = 24 * time.Hour
)
// StopZombieTasks stops the task which have running status, but haven't been updated for a long time
func StopZombieTasks(ctx context.Context) error {
return stopTasks(ctx, actions_model.FindTaskOptions{
Status: actions_model.StatusRunning,
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-zombieTaskTimeout).Unix()),
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.ZombieTaskTimeout).Unix()),
})
}
@ -33,7 +28,7 @@ func StopZombieTasks(ctx context.Context) error {
func StopEndlessTasks(ctx context.Context) error {
return stopTasks(ctx, actions_model.FindTaskOptions{
Status: actions_model.StatusRunning,
StartedBefore: timeutil.TimeStamp(time.Now().Add(-endlessTaskTimeout).Unix()),
StartedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.EndlessTaskTimeout).Unix()),
})
}
@ -81,7 +76,7 @@ func stopTasks(ctx context.Context, opts actions_model.FindTaskOptions) error {
func CancelAbandonedJobs(ctx context.Context) error {
jobs, _, err := actions_model.FindRunJobs(ctx, actions_model.FindRunJobOptions{
Statuses: []actions_model.Status{actions_model.StatusWaiting, actions_model.StatusBlocked},
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-abandonedJobTimeout).Unix()),
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.AbandonedJobTimeout).Unix()),
})
if err != nil {
log.Warn("find abandoned tasks: %v", err)