From 9e8c5545cc516e2256f07c888d4cb5d4db3cdecf Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Fri, 5 Jul 2024 23:56:23 +0200 Subject: [PATCH 01/10] always run release --- .forgejo/workflows/build.yaml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.forgejo/workflows/build.yaml b/.forgejo/workflows/build.yaml index da58767..e0035ad 100644 --- a/.forgejo/workflows/build.yaml +++ b/.forgejo/workflows/build.yaml @@ -1,20 +1,7 @@ on: [push] jobs: - test: - runs-on: docker - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version: '1.22' - - run: go build - - uses: forgejo/upload-artifact@v3 - with: - name: morffix - path: morffix release: runs-on: docker - if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 From a45becb2abe83f2f8361aa032366ef006d31e7d2 Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Fri, 5 Jul 2024 23:58:18 +0200 Subject: [PATCH 02/10] fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f67a607..dea61ce 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ # Morffix -Morph and fix you Media distributedly with ffmpeg +Morph and fix your Media distributedly with ffmpeg From 265b2c16335ed0cc7efd83e31fe0a45615c783b4 Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 01:20:01 +0200 Subject: [PATCH 03/10] Add System Files --- systemd/morffix-worker.service | 18 ++++++++++++++++++ systemd/morffix.service | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 systemd/morffix-worker.service create mode 100644 systemd/morffix.service diff --git a/systemd/morffix-worker.service b/systemd/morffix-worker.service new file mode 100644 index 0000000..be423a2 --- /dev/null +++ b/systemd/morffix-worker.service @@ -0,0 +1,18 @@ +[Unit] +Description=morffix worker +ConditionPathExists=/opt/morffix/morffix +After=network.target +Wants=morffix.service + +[Service] +Type=simple +User=root +LimitNOFILE=1024 +Restart=on-failure +RestartSec=10 +StartLimitIntervalSec=60 +WorkingDirectory=/opt/morffix +ExecStart=/opt/morffix/morffix + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/systemd/morffix.service b/systemd/morffix.service new file mode 100644 index 0000000..72c7d43 --- /dev/null +++ b/systemd/morffix.service @@ -0,0 +1,19 @@ +[Unit] +Description=morffix +ConditionPathExists=/opt/morffix/morffix +After=network.target +Wants=postgresql@15-main.service + +[Service] +Type=simple +User=root +Group=media_library +LimitNOFILE=1024 +Restart=on-failure +RestartSec=10 +StartLimitIntervalSec=60 +WorkingDirectory=/opt/morffix +ExecStart=/opt/morffix/morffix -server + +[Install] +WantedBy=multi-user.target \ No newline at end of file From e8f7521fa4953a916e055ab180955e9b97735f16 Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 01:37:51 +0200 Subject: [PATCH 04/10] compile for amd64 --- .forgejo/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.forgejo/workflows/build.yaml b/.forgejo/workflows/build.yaml index e0035ad..2f63773 100644 --- a/.forgejo/workflows/build.yaml +++ b/.forgejo/workflows/build.yaml @@ -7,7 +7,7 @@ jobs: - uses: actions/setup-go@v5 with: go-version: '1.22' - - run: go build + - run: GOOS=linux GOARCH=amd64 go build - run: mkdir out - run: cp morffix out/morffix - uses: actions/forgejo-release@eb0fcc44a150c0de82e6fdb36752dd56bf27d017 From b99a818040b8812e5df644bda356464b5e98ce36 Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 03:05:58 +0200 Subject: [PATCH 05/10] Cleanup old temp files on startup --- task/healthcheck.go | 2 +- task/transcode.go | 4 ++-- worker/worker.go | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/task/healthcheck.go b/task/healthcheck.go index a8ab6a2..9039634 100644 --- a/task/healthcheck.go +++ b/task/healthcheck.go @@ -17,7 +17,7 @@ func RunHealthCheck(conf config.Config, t *types.Task, data types.HealthCheckDat l := log.GetTaskLogger(t) // TODO Figure out how to get correct file ending - path := filepath.Join(conf.Worker.TempDir, fmt.Sprintf("%v-%v.mkv", t.ID, t.FileID)) + path := filepath.Join(conf.Worker.TempDir, fmt.Sprintf("morffix-health-%v-%v.mkv", t.ID, t.FileID)) // Set ffmpeg input path if len(t.FfmpegCommand.InputFiles) == 0 { diff --git a/task/transcode.go b/task/transcode.go index 3a1e863..9d6a143 100644 --- a/task/transcode.go +++ b/task/transcode.go @@ -17,8 +17,8 @@ func RunTranscode(conf config.Config, t *types.Task, data types.TranscodeData) { l := log.GetTaskLogger(t) // TODO Figure out how to get correct file ending - src_path := filepath.Join(conf.Worker.TempDir, fmt.Sprintf("src-%v-%v.mkv", t.ID, t.FileID)) - dst_path := filepath.Join(conf.Worker.TempDir, fmt.Sprintf("dst-%v-%v.mkv", t.ID, t.FileID)) + src_path := filepath.Join(conf.Worker.TempDir, fmt.Sprintf("morffix-src-%v-%v.mkv", t.ID, t.FileID)) + dst_path := filepath.Join(conf.Worker.TempDir, fmt.Sprintf("morffix-dst-%v-%v.mkv", t.ID, t.FileID)) // Set ffmpeg input path if len(t.FfmpegCommand.InputFiles) == 0 { diff --git a/worker/worker.go b/worker/worker.go index 07dbad6..411c872 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -5,6 +5,7 @@ import ( "io" "log/slog" "net/http" + "path/filepath" "git.lastassault.de/speatzle/morffix/rpc" @@ -36,6 +37,20 @@ func Start(_conf config.Config) { return } + slog.InfoContext(ctx, "Cleaning tmp Files...") + files, err := filepath.Glob("/tmp/morffix-*") + if err != nil { + slog.Error("Get tmp Files", "err", err) + return + } + for _, f := range files { + slog.InfoContext(ctx, "Deleting File", "path", f) + if err := os.Remove(f); err != nil { + slog.Error("Deleting tmp File", "err", err, "path", f) + return + } + } + sigs := make(chan os.Signal, 1) signal.Notify(sigs, os.Interrupt) exit := false From c37c26908db844b2cbb3dc1b53edfb8fe377d5df Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 03:07:06 +0200 Subject: [PATCH 06/10] Prevent Starting Task Twice --- constants/error.go | 1 + task/task.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/constants/error.go b/constants/error.go index c375e01..341148c 100644 --- a/constants/error.go +++ b/constants/error.go @@ -3,3 +3,4 @@ package constants import "fmt" var ErrTaskDoesNotExist = fmt.Errorf("Task does not Exist") +var ErrTaskIsAlreadyRunning = fmt.Errorf("Task is Already Running") diff --git a/task/task.go b/task/task.go index 6e2ad61..f6c687c 100644 --- a/task/task.go +++ b/task/task.go @@ -17,6 +17,11 @@ func StartTask(conf config.Config, data types.TaskStart) error { taskMutex.Lock() defer taskMutex.Unlock() + _, ok := tasks[data.ID] + if ok { + return constants.ErrTaskIsAlreadyRunning + } + tasks[data.ID] = &types.Task{ ID: data.ID, Type: data.Type, From a9d901da719e508e5c303abce8d271c83c3e38d7 Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 03:08:58 +0200 Subject: [PATCH 07/10] Also Count Unkown Tasks as Assigned --- server/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/task.go b/server/task.go index 8161ca1..4d430a1 100644 --- a/server/task.go +++ b/server/task.go @@ -358,7 +358,7 @@ func assignQueuedTasks(ctx context.Context) error { } if Workers[i].Connected { var count int - err := db.QueryRow(ctx, "SELECT COUNT(*) FROM tasks WHERE worker_id = $1 AND (status = $2 OR status = $3 OR status = $4)", i, constants.TASK_STATUS_ASSIGNED, constants.TASK_STATUS_RUNNING, constants.TASK_STATUS_WAITING).Scan(&count) + err := db.QueryRow(ctx, "SELECT COUNT(*) FROM tasks WHERE worker_id = $1 AND (status = $2 OR status = $3 OR status = $4 OR status = $5)", i, constants.TASK_STATUS_UNKNOWN, constants.TASK_STATUS_ASSIGNED, constants.TASK_STATUS_RUNNING, constants.TASK_STATUS_WAITING).Scan(&count) if err != nil { return fmt.Errorf("Error Querying Worker Task Count: %w", err) } From 7574628a1d1c0b9e4617ddef9d9b548a07ddb5bb Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 03:09:34 +0200 Subject: [PATCH 08/10] check worker for unkown tasks --- server/worker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/worker.go b/server/worker.go index 573afea..2305cff 100644 --- a/server/worker.go +++ b/server/worker.go @@ -180,7 +180,7 @@ func updateWorkerTaskStatus(ctx context.Context) { if Workers[uuid].Connected { w := Workers[uuid] - rows, err := db.Query(ctx, "SELECT id, COALESCE(CARDINALITY(log),0) as log_offset FROM tasks WHERE worker_id = $1 AND (status = $2 OR status = $3 OR status = $4)", uuid, constants.TASK_STATUS_ASSIGNED, constants.TASK_STATUS_RUNNING, constants.TASK_STATUS_WAITING) + rows, err := db.Query(ctx, "SELECT id, COALESCE(CARDINALITY(log),0) as log_offset FROM tasks WHERE worker_id = $1 AND (status = $2 OR status = $3 OR status = $4 OR status = $5)", uuid, constants.TASK_STATUS_UNKNOWN, constants.TASK_STATUS_ASSIGNED, constants.TASK_STATUS_RUNNING, constants.TASK_STATUS_WAITING) if err != nil { slog.ErrorContext(ctx, "Error Getting Tasks for Worker", "err", err, "worker_id", uuid) return From 78ab8c9daf18e9d5a962b8c718683dbe2828533f Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 15:54:49 +0200 Subject: [PATCH 09/10] Handle Unkown Tasks and RPC Error way better --- constants/error.go | 1 + rpc/call.go | 3 ++- server/task.go | 20 +++++++++++++++++++- server/worker.go | 13 +++++++++++-- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/constants/error.go b/constants/error.go index 341148c..bb3e278 100644 --- a/constants/error.go +++ b/constants/error.go @@ -4,3 +4,4 @@ import "fmt" var ErrTaskDoesNotExist = fmt.Errorf("Task does not Exist") var ErrTaskIsAlreadyRunning = fmt.Errorf("Task is Already Running") +var ErrRPCRequestTimeout = fmt.Errorf("Request timed out") diff --git a/rpc/call.go b/rpc/call.go index 71696de..037f384 100644 --- a/rpc/call.go +++ b/rpc/call.go @@ -6,6 +6,7 @@ import ( "fmt" "time" + "git.lastassault.de/speatzle/morffix/constants" "github.com/google/uuid" "nhooyr.io/websocket" ) @@ -89,7 +90,7 @@ func (s *server) Call(ctx context.Context, c *websocket.Conn, method string, par // remove request from map delete(s.requests, id) - return nil, fmt.Errorf("Request timed out") + return nil, constants.ErrRPCRequestTimeout } } diff --git a/server/task.go b/server/task.go index 4d430a1..d86d899 100644 --- a/server/task.go +++ b/server/task.go @@ -4,11 +4,13 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "log/slog" "net/http" "slices" "strconv" + "strings" "time" "git.lastassault.de/speatzle/morffix/constants" @@ -387,7 +389,23 @@ func assignQueuedTasks(ctx context.Context) error { } _, err = rpcServer.Call(ctx, Workers[i].Conn, "task-start", taskStart, nil) - if err != nil { + if strings.HasSuffix(err.Error(), constants.ErrTaskIsAlreadyRunning.Error()) { + // Task was started previously but something went wrong and we are out of sync + slog.WarnContext(ctx, "Task is apparently already Running on this Worker, thats bad", "task_id", taskStart.ID, "worker", Workers[i].Name) + + _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskStart.ID, constants.TASK_STATUS_ASSIGNED, []string{fmt.Sprintf("%v MASTER: Task Start, Task Already Running!", time.Now())}) + if err != nil { + return fmt.Errorf("Updating Task status during already running error: %w", err) + } + } else if errors.Is(err, constants.ErrRPCRequestTimeout) { + // We really don't know whats going on, might be slow response, oom, disk full or a bug + slog.WarnContext(ctx, "Task start Timed Out", "task_id", taskStart.ID, "worker", Workers[i].Name) + + _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskStart.ID, constants.TASK_STATUS_UNKNOWN, []string{fmt.Sprintf("%v MASTER: Task Start RPC Call Timed Out!", time.Now())}) + if err != nil { + return fmt.Errorf("Updating Unknown Task Status due to Timeout while starting Task: %w", err) + } + } else if err != nil { return fmt.Errorf("Error Starting Task: %w", err) } diff --git a/server/worker.go b/server/worker.go index 2305cff..b501165 100644 --- a/server/worker.go +++ b/server/worker.go @@ -2,6 +2,8 @@ package server import ( "context" + "errors" + "fmt" "log/slog" "net/http" "strings" @@ -207,7 +209,7 @@ func updateWorkerTaskStatus(ctx context.Context) { if strings.HasSuffix(err.Error(), constants.ErrTaskDoesNotExist.Error()) { // Worker says it does not know of this task, mark it failed so that we don't asks the worker about it again and again slog.ErrorContext(ctx, "Task is unknown by worker, Failing...", "err", err, "id", taskID) - _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, constants.TASK_STATUS_FAILED, []string{"Task Failed because it is unknown to Assigned Worker"}) + _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, constants.TASK_STATUS_FAILED, []string{fmt.Sprintf("%v MASTER: Task Failed because it is unknown to Assigned Worker", time.Now())}) if err != nil { slog.ErrorContext(ctx, "Updating Failed Task Status", "err", err) return @@ -215,10 +217,17 @@ func updateWorkerTaskStatus(ctx context.Context) { slog.Info("Updating task done", "id", taskID, "status", constants.TASK_STATUS_FAILED) return + } else if errors.Is(err, constants.ErrRPCRequestTimeout) { + // We really don't know whats going on, might be slow response, oom, disk full or a bug + _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, constants.TASK_STATUS_UNKNOWN, []string{fmt.Sprintf("%v MASTER: Status RPC Call Timed Out!", time.Now())}) + if err != nil { + slog.ErrorContext(ctx, "Updating Unknown Task Status due to Timeout", "err", err) + return + } } else { slog.ErrorContext(ctx, "Getting Task Status", "err", err) - _, err = db.Exec(ctx, "UPDATE tasks SET status = $2 WHERE id = $1", taskID, constants.TASK_STATUS_UNKNOWN) + _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, constants.TASK_STATUS_UNKNOWN, []string{fmt.Sprintf("%v MASTER: Status RPC Call Error: %v", time.Now(), err.Error())}) if err != nil { slog.ErrorContext(ctx, "Updating Unknown Task Status", "err", err) return From 24e11d9103a7c31bf6c70df84ad43dff55c2cd36 Mon Sep 17 00:00:00 2001 From: Samuel Lorch Date: Sat, 6 Jul 2024 15:59:34 +0200 Subject: [PATCH 10/10] save log offeset so that the master can also log without dropping worker log lines --- migrations/000015_alter_tasks_table_log_offset.down.sql | 2 ++ migrations/000015_alter_tasks_table_log_offset.up.sql | 2 ++ server/worker.go | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 migrations/000015_alter_tasks_table_log_offset.down.sql create mode 100644 migrations/000015_alter_tasks_table_log_offset.up.sql diff --git a/migrations/000015_alter_tasks_table_log_offset.down.sql b/migrations/000015_alter_tasks_table_log_offset.down.sql new file mode 100644 index 0000000..bfbd448 --- /dev/null +++ b/migrations/000015_alter_tasks_table_log_offset.down.sql @@ -0,0 +1,2 @@ +ALTER TABLE tasks +DROP IF EXISTS log_offset; diff --git a/migrations/000015_alter_tasks_table_log_offset.up.sql b/migrations/000015_alter_tasks_table_log_offset.up.sql new file mode 100644 index 0000000..7543ea9 --- /dev/null +++ b/migrations/000015_alter_tasks_table_log_offset.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE tasks +ADD log_offset integer NOT NULL DEFAULT COALESCE(CARDINALITY(log),0); diff --git a/server/worker.go b/server/worker.go index b501165..ed8322e 100644 --- a/server/worker.go +++ b/server/worker.go @@ -182,7 +182,7 @@ func updateWorkerTaskStatus(ctx context.Context) { if Workers[uuid].Connected { w := Workers[uuid] - rows, err := db.Query(ctx, "SELECT id, COALESCE(CARDINALITY(log),0) as log_offset FROM tasks WHERE worker_id = $1 AND (status = $2 OR status = $3 OR status = $4 OR status = $5)", uuid, constants.TASK_STATUS_UNKNOWN, constants.TASK_STATUS_ASSIGNED, constants.TASK_STATUS_RUNNING, constants.TASK_STATUS_WAITING) + rows, err := db.Query(ctx, "SELECT id, log_offset FROM tasks WHERE worker_id = $1 AND (status = $2 OR status = $3 OR status = $4 OR status = $5)", uuid, constants.TASK_STATUS_UNKNOWN, constants.TASK_STATUS_ASSIGNED, constants.TASK_STATUS_RUNNING, constants.TASK_STATUS_WAITING) if err != nil { slog.ErrorContext(ctx, "Error Getting Tasks for Worker", "err", err, "worker_id", uuid) return @@ -236,7 +236,7 @@ func updateWorkerTaskStatus(ctx context.Context) { } } - _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, ts.Task.Status, ts.Task.Log) + _, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3, log_offset = log_offset + $4 WHERE id = $1", taskID, ts.Task.Status, ts.Task.Log, len(ts.Task.Log)) if err != nil { slog.ErrorContext(ctx, "Updating Task Status", "err", err) return