Move logs from array to dedicated table
All checks were successful
/ release (push) Successful in 1m23s

This avoids the append problem which bloats the database to 200+ gb in a
month and shrinks again to 5g with a full vacuum
This commit is contained in:
Samuel Lorch 2025-05-18 19:18:52 +02:00
parent 15a960da19
commit f52f517dc1
9 changed files with 99 additions and 11 deletions

View file

@ -205,40 +205,86 @@ func updateWorkerTaskStatus(ctx context.Context) {
_, err := rpcServer.Call(ctx, w.Conn, "task-status", tsr, &ts)
if err != nil {
tx, err := db.Begin(ctx)
if err != nil {
slog.ErrorContext(ctx, "Task Status Error Begin Transaction", "err", err)
return
}
defer tx.Rollback(ctx)
// Find better way to compare errors which where send via websocket
if strings.HasSuffix(err.Error(), constants.ErrTaskDoesNotExist.Error()) {
// Worker says it does not know of this task, mark it failed so that we don't asks the worker about it again and again
slog.ErrorContext(ctx, "Task is unknown by worker, Failing...", "err", err, "id", taskID)
_, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, constants.TASK_STATUS_FAILED, []string{fmt.Sprintf("%v MASTER: Task Failed because it is unknown to Assigned Worker", time.Now())})
_, err = tx.Exec(ctx, "UPDATE tasks SET status = $2 WHERE id = $1", taskID, constants.TASK_STATUS_FAILED)
if err != nil {
slog.ErrorContext(ctx, "Updating Failed Task Status", "err", err)
return
}
_, err = tx.Exec(ctx, "INSERT INTO logs(task_id, message) VALUES($1,$2)", taskID, fmt.Sprintf("%v MASTER: Task Failed because it is unknown to Assigned Worker", time.Now()))
if err != nil {
slog.ErrorContext(ctx, "Updating Failed Task Log", "err", err)
return
}
slog.Info("Updating task done", "id", taskID, "status", constants.TASK_STATUS_FAILED)
return
} else if errors.Is(err, constants.ErrRPCRequestTimeout) {
// We really don't know whats going on, might be slow response, oom, disk full or a bug
_, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, constants.TASK_STATUS_UNKNOWN, []string{fmt.Sprintf("%v MASTER: Status RPC Call Timed Out!", time.Now())})
_, err = tx.Exec(ctx, "UPDATE tasks SET status = $2 WHERE id = $1", taskID, constants.TASK_STATUS_UNKNOWN)
if err != nil {
slog.ErrorContext(ctx, "Updating Unknown Task Status due to Timeout", "err", err)
return
}
_, err = tx.Exec(ctx, "INSERT INTO logs(task_id, message) VALUES($1,$2)", taskID, fmt.Sprintf("%v MASTER: Status RPC Call Timed Out!", time.Now()))
if err != nil {
slog.ErrorContext(ctx, "Updating Unknown Task Log due to Timeout", "err", err)
return
}
} else {
slog.ErrorContext(ctx, "Getting Task Status", "err", err)
_, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3 WHERE id = $1", taskID, constants.TASK_STATUS_UNKNOWN, []string{fmt.Sprintf("%v MASTER: Status RPC Call Error: %v", time.Now(), err.Error())})
_, err = db.Exec(ctx, "UPDATE tasks SET status = $2 WHERE id = $1", taskID, constants.TASK_STATUS_UNKNOWN)
if err != nil {
slog.ErrorContext(ctx, "Updating Unknown Task Status", "err", err)
return
}
_, err = tx.Exec(ctx, "INSERT INTO logs(task_id, message) VALUES($1,$2)", taskID, fmt.Sprintf("%v MASTER: Status RPC Call Error: %v", time.Now(), err.Error()))
if err != nil {
slog.ErrorContext(ctx, "Updating Unknown Task Log", "err", err)
return
}
}
err = tx.Commit(ctx)
if err != nil {
slog.ErrorContext(ctx, "Task Status Error Commit Transaction", "err", err)
}
return
}
tx, err := db.Begin(ctx)
if err != nil {
slog.ErrorContext(ctx, "Task Status Success Begin Transaction", "err", err)
return
}
_, err = tx.Exec(ctx, "UPDATE tasks SET status = $2, log_offset = log_offset + $3 WHERE id = $1", taskID, ts.Task.Status, len(ts.Task.Log))
if err != nil {
slog.ErrorContext(ctx, "Updating Task Status and offset", "err", err)
return
}
// TODO batch these or use copy protocol
for _, l := range ts.Task.Log {
_, err = tx.Exec(ctx, "INSERT INTO logs(task_id, message) VALUES($1,$2)", taskID, l)
if err != nil {
slog.ErrorContext(ctx, "Updating Task Log", "err", err)
return
}
}
_, err = db.Exec(ctx, "UPDATE tasks SET status = $2, log = log || $3, log_offset = log_offset + $4 WHERE id = $1", taskID, ts.Task.Status, ts.Task.Log, len(ts.Task.Log))
err = tx.Commit(ctx)
if err != nil {
slog.ErrorContext(ctx, "Updating Task Status", "err", err)
slog.ErrorContext(ctx, "Task Status Commit Transaction", "err", err)
return
}