Improve Scan, Seperate Hashing
All checks were successful
/ release (push) Successful in 42s

This commit is contained in:
Samuel Lorch 2024-07-07 01:12:01 +02:00
parent cd3c06d7d0
commit e47a254cad
8 changed files with 178 additions and 55 deletions

View file

@ -1,2 +0,0 @@
ALTER TABLE files
ALTER COLUMN hash bigint SET NOT NULL;

View file

@ -1,2 +0,0 @@
ALTER TABLE files
ALTER COLUMN hash bigint DROP NOT NULL;

View file

@ -0,0 +1,3 @@
ALTER TABLE files
ALTER COLUMN md5 SET NOT NULL,
DROP IF EXISTS mod_time;

View file

@ -0,0 +1,3 @@
ALTER TABLE files
ALTER COLUMN md5 DROP NOT NULL,
ADD mod_time TIMESTAMP NOT NULL DEFAULT current_timestamp;

62
server/hash.go Normal file
View file

@ -0,0 +1,62 @@
package server
import (
"context"
"crypto/md5"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"slices"
"git.lastassault.de/speatzle/morffix/constants"
)
func hashFile(ctx context.Context, id uint) error {
slog.InfoContext(ctx, "Hashing File", "id", id)
var filePath string
var libraryPath string
var oldHash []byte
err := db.QueryRow(ctx, "SELECT f.path, l.path, md5 FROM files f INNER JOIN libraries l ON l.id = f.library_id WHERE f.id = $1", id).Scan(&filePath, &libraryPath, &oldHash)
if err != nil {
return fmt.Errorf("Get File: %w", err)
}
fullPath := filepath.Join(libraryPath, filePath)
file, err := os.Open(fullPath)
if err != nil {
return fmt.Errorf("Opening File: %w", err)
}
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return fmt.Errorf("Reading File: %w", err)
}
newHash := hash.Sum(nil)
if slices.Compare[[]byte](newHash, oldHash) != 0 {
// File has changed
// TODO Queue healthcheck / transcode if enabled
}
tx, err := db.Begin(ctx)
if err != nil {
return fmt.Errorf("Begin Transaction: %w", err)
}
defer tx.Rollback(ctx)
_, err = tx.Exec(ctx, "UPDATE files SET status = $2, md5 = $3 WHERE id = $1", id, constants.FILE_STATUS_EXISTS, newHash)
if err != nil {
return fmt.Errorf("Updating File in DB: %w", err)
}
err = tx.Commit(ctx)
if err != nil {
return fmt.Errorf("Committing Changes: %w", err)
}
slog.InfoContext(ctx, "Hashing File Done", "id", id)
return nil
}

View file

@ -63,10 +63,6 @@ func handleLibrary(w http.ResponseWriter, r *http.Request) {
Enable: enabled, Enable: enabled,
} }
if r.Method == "PUT" {
// TODO
}
rows, err := db.Query(r.Context(), "SELECT id, path, size, status, health, transcode, md5, updated_at FROM files where library_id = $1", id) rows, err := db.Query(r.Context(), "SELECT id, path, size, status, health, transcode, md5, updated_at FROM files where library_id = $1", id)
if err != nil { if err != nil {
slog.ErrorContext(r.Context(), "Query Files", "err", err) slog.ErrorContext(r.Context(), "Query Files", "err", err)

View file

@ -10,6 +10,9 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"slices" "slices"
"strconv"
"sync"
"time"
"git.lastassault.de/speatzle/morffix/constants" "git.lastassault.de/speatzle/morffix/constants"
"github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5"
@ -23,16 +26,16 @@ func handleScan(w http.ResponseWriter, r *http.Request) {
return return
} }
id := r.PathValue("id") id, err := strconv.Atoi(r.FormValue("id"))
if id == "" { if err != nil {
http.Error(w, "No ID Set", http.StatusBadRequest) http.Error(w, "Parsing ID", http.StatusBadRequest)
return return
} }
var name string var name string
var path string var path string
var enabled bool var enabled bool
err := db.QueryRow(r.Context(), "SELECT name, path, enable FROM libraries WHERE id = $1", id).Scan(&name, &path, &enabled) err = db.QueryRow(r.Context(), "SELECT name, path, enable FROM libraries WHERE id = $1", id).Scan(&name, &path, &enabled)
if err != nil { if err != nil {
slog.ErrorContext(r.Context(), "Get Library", "err", err) slog.ErrorContext(r.Context(), "Get Library", "err", err)
http.Error(w, "Error Get Library: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Error Get Library: "+err.Error(), http.StatusInternalServerError)
@ -40,7 +43,7 @@ func handleScan(w http.ResponseWriter, r *http.Request) {
} }
scanCtx := context.Background() scanCtx := context.Background()
go scan(scanCtx, id) go scanLibrary(scanCtx, id)
message := "Scan Started" message := "Scan Started"
@ -75,17 +78,81 @@ func scanStatus(w http.ResponseWriter, r *http.Request) {
} }
} }
func scan(ctx context.Context, id string) { func manageScan(stop chan bool) {
slog.InfoContext(ctx, "Starting Scan", "id", id) scanTicker := time.NewTicker(time.Minute)
hashTicker := time.NewTicker(time.Second)
scanRunning := false
hashRunning := false
ctx, cancel := context.WithCancel(context.Background())
// TODO Scan settings: for {
// - Auto Queue Healthcheck for Changed Files select {
// - Auto Queue Healthcheck for New Files case <-scanTicker.C:
// - Auto Queue Transcode for New Files if scanRunning {
// - Auto Queue Transcode for Changed Files (? Instead have library setting to queue transcode for changed files on healthcheck success) continue
// - Auto Queue Health/Transcode for Unkown Status ? (might result in requeue loop) }
// - Schedule Scans Periodically scanRunning = true
// - Add File Monitoring for Setting Changed status and triggering tasks go func() {
defer func() {
scanRunning = false
}()
rows, err := db.Query(ctx, "SELECT id FROM libraries WHERE enable = true")
if err != nil {
slog.ErrorContext(ctx, "Error getting Enabled Libraries for Scan", "err", err)
return
}
libraries, err := pgx.CollectRows[int](rows, pgx.RowTo[int])
if err != nil {
slog.ErrorContext(ctx, "Error Collecting Enabled Libraries for Scan", "err", err)
return
}
for _, l := range libraries {
scanLibrary(ctx, l)
}
}()
case <-hashTicker.C:
if hashRunning {
continue
}
hashRunning = true
go func() {
defer func() {
hashRunning = false
}()
var fileID uint
err := db.QueryRow(ctx, "SELECT id FROM files WHERE status = $1 OR status = $2 LIMIT 1", constants.FILE_STATUS_CHANGED, constants.FILE_STATUS_NEW).Scan(&fileID)
if err != nil {
if !errors.Is(err, pgx.ErrNoRows) {
slog.ErrorContext(ctx, "Error Getting Files for Hashing", "err", err)
}
return
}
err = hashFile(ctx, fileID)
if err != nil {
slog.ErrorContext(ctx, "Error Hashing File", "err", err)
}
}()
case <-stop:
cancel()
return
}
}
}
var scanLock sync.Mutex
func scanLibrary(ctx context.Context, id int) {
slog.InfoContext(ctx, "Acquiring Scan Lock...")
scanLock.Lock()
defer scanLock.Unlock()
slog.InfoContext(ctx, "Starting Scan", "id", id)
var name string var name string
var lpath string var lpath string
@ -146,31 +213,16 @@ func scan(ctx context.Context, id string) {
slog.InfoContext(ctx, "Skipping non video file", "path", fullPath) slog.InfoContext(ctx, "Skipping non video file", "path", fullPath)
return nil return nil
} }
/*
slog.InfoContext(ctx, "Hashing File", "path", fullPath, "size", info.Size())
file, err := os.Open(fullPath)
if err != nil {
return fmt.Errorf("Opening File: %w", err)
}
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return fmt.Errorf("Reading File: %w", err)
}
newMD5 := hash.Sum(nil)
slog.InfoContext(ctx, "File MD5", "path", fullPath, "size", info.Size(), "md5", newMD5)
*/
fPath, err := filepath.Rel(lpath, fullPath) fPath, err := filepath.Rel(lpath, fullPath)
if err != nil { if err != nil {
return fmt.Errorf("Getting Relative Path: %w", err) return fmt.Errorf("Getting Relative Path: %w", err)
} }
var fileID int var fileID int
var size uint var oldSize uint
var health constants.FileHealth var oldModTime time.Time
err = tx.QueryRow(ctx, "SELECT id, size, health FROM files WHERE library_id = $1 AND path = $2", id, fPath).Scan(&fileID, &size, &health) err = tx.QueryRow(ctx, "SELECT id, size, mod_time FROM files WHERE library_id = $1 AND path = $2", id, fPath).Scan(&fileID, &oldSize, &oldModTime)
if errors.Is(err, pgx.ErrNoRows) { if errors.Is(err, pgx.ErrNoRows) {
// File Does not Exist Yet // File Does not Exist Yet
@ -184,17 +236,24 @@ func scan(ctx context.Context, id string) {
return fmt.Errorf("Getting File: %w", err) return fmt.Errorf("Getting File: %w", err)
} }
/* // Remove Timezone and Round to nearest Second
if slices.Compare[[]byte](newMD5, oldMD5) != 0 { newModTime := info.ModTime().UTC().Round(time.Second)
// File has changed on disk so reset health
health = constants.FILE_HEALTH_UNKNOWN // File Already Exists, Check if it has been changed
if newModTime.Equal(oldModTime) && uint(info.Size()) == oldSize {
slog.Debug("File stayed the same", "id", fileID)
_, err = tx.Exec(ctx, "UPDATE files SET status = $2 WHERE id = $1", fileID, constants.FILE_STATUS_EXISTS)
if err != nil {
return fmt.Errorf("Updating Non Changed File in DB: %w", err)
}
} else {
slog.InfoContext(ctx, "File Has Changed", "path", fullPath, "old_mod_time", oldModTime, "new_mod_time", newModTime, "old_size", oldSize, "new_size", info.Size())
_, err = tx.Exec(ctx, "UPDATE files SET size = $2, status = $3, health = $4, mod_time = $5 WHERE id = $1", fileID, info.Size(), constants.FILE_STATUS_CHANGED, constants.FILE_HEALTH_UNKNOWN, newModTime)
if err != nil {
return fmt.Errorf("Updating Changed File in DB: %w", err)
} }
*/
// File Exists so update Size, status and hash
_, err = tx.Exec(ctx, "UPDATE files SET size = $2, status = $3, health = $4 WHERE id = $1", fileID, info.Size(), constants.FILE_STATUS_EXISTS, health)
if err != nil {
return fmt.Errorf("Updating File in DB: %w", err)
} }
return nil return nil
}) })
if err != nil { if err != nil {
@ -207,7 +266,5 @@ func scan(ctx context.Context, id string) {
slog.ErrorContext(ctx, "Error Committing Changes", "err", err) slog.ErrorContext(ctx, "Error Committing Changes", "err", err)
return return
} }
// TODO, create health and transcode tasks if requested
slog.InfoContext(ctx, "Scan Done", "id", id) slog.InfoContext(ctx, "Scan Done", "id", id)
} }

View file

@ -126,8 +126,11 @@ func Start(_conf config.Config, tmplFS embed.FS, staticFS embed.FS, migrationsFS
serverClose <- true serverClose <- true
}() }()
stopCleanup := make(chan bool, 1) stopWorkerManagement := make(chan bool, 1)
go manageWorkers(stopCleanup) go manageWorkers(stopWorkerManagement)
stopScanning := make(chan bool, 1)
go manageScan(stopScanning)
sigs := make(chan os.Signal, 1) sigs := make(chan os.Signal, 1)
signal.Notify(sigs, os.Interrupt) signal.Notify(sigs, os.Interrupt)
@ -139,7 +142,10 @@ func Start(_conf config.Config, tmplFS embed.FS, staticFS embed.FS, migrationsFS
stopCtx, cancel := context.WithTimeout(context.Background(), time.Second*10) stopCtx, cancel := context.WithTimeout(context.Background(), time.Second*10)
server.Shutdown(stopCtx) server.Shutdown(stopCtx)
cancel() cancel()
stopCleanup <- true slog.Info("Stopping Worker Management...")
stopWorkerManagement <- true
slog.Info("Stopping Scanning...")
stopScanning <- true
slog.Info("Done") slog.Info("Done")
} }
} }