Initial import of transcript pipeline

This commit is contained in:
maddin
2026-04-15 00:01:38 +02:00
commit fea662392c
305 changed files with 40508 additions and 0 deletions
Executable
+294
View File
@@ -0,0 +1,294 @@
#!/usr/bin/env bash
set -euo pipefail
SOURCE_PATH="${BASH_SOURCE[0]}"
while [[ -L "$SOURCE_PATH" ]]; do
SOURCE_DIR="$(cd "$(dirname "$SOURCE_PATH")" && pwd)"
SOURCE_PATH="$(readlink "$SOURCE_PATH")"
[[ "$SOURCE_PATH" != /* ]] && SOURCE_PATH="$SOURCE_DIR/$SOURCE_PATH"
done
SCRIPT_DIR="$(cd "$(dirname "$SOURCE_PATH")" && pwd)"
PROJECT_DIR="${PROJECT_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)}"
PIPELINE_SCRIPT="$PROJECT_DIR/transcript_pipeline.py"
PIPELINE_LOG="$PROJECT_DIR/pipeline.log"
STATE_DB="$PROJECT_DIR/pipeline_state.sqlite3"
PLATFORM="$(uname -s)"
LABEL="com.maddin.whisper-transcript-pipeline"
PYTHON_BIN="${PYTHON_BIN:-$(command -v python3 || true)}"
if [[ -z "$PYTHON_BIN" ]]; then
echo "python3 not found in PATH" >&2
exit 1
fi
macos_is_loaded() {
local domain
domain="gui/$(id -u)"
launchctl print "$domain/$LABEL" >/dev/null 2>&1
}
linux_service_name() {
echo "$LABEL.service"
}
linux_unit_path() {
echo "$HOME/.config/systemd/user/$(linux_service_name)"
}
show_pending_uploads() {
if [[ -f "$STATE_DB" ]] && command -v sqlite3 >/dev/null 2>&1; then
local pending
pending="$(sqlite3 "$STATE_DB" "select basename || '|' || coalesce(remote_audio_status,'') || '|' || coalesce(remote_audio_last_error,'') from source_state where coalesce(remote_audio_status,'') != '' and remote_audio_status != 'uploaded';" || true)"
if [[ -n "$pending" ]]; then
echo "pending uploads:"
echo "$pending"
fi
fi
}
macos_start() {
exec "$PROJECT_DIR/install_launch_agent.sh"
}
macos_stop() {
if [[ -x "$PROJECT_DIR/uninstall_launch_agent.sh" ]]; then
"$PROJECT_DIR/uninstall_launch_agent.sh" --keep-files
fi
pkill -f "$PIPELINE_SCRIPT watch" >/dev/null 2>&1 || true
echo "stopped: $LABEL"
}
macos_status() {
local pids
if macos_is_loaded; then
echo "launchd: loaded"
launchctl list | grep "$LABEL" || true
else
echo "launchd: not loaded"
fi
pids="$(pgrep -af "$PIPELINE_SCRIPT watch" || true)"
if [[ -n "$pids" ]]; then
echo "process:"
echo "$pids"
else
echo "process: not running"
fi
if [[ -f "$PIPELINE_LOG" ]]; then
echo "pipeline log:"
tail -n 5 "$PIPELINE_LOG"
fi
show_pending_uploads
}
linux_start() {
exec "$PROJECT_DIR/deploy/linux/install_systemd_service.sh"
}
linux_stop() {
local service
service="$(linux_service_name)"
systemctl --user stop "$service" >/dev/null 2>&1 || true
pkill -f "$PIPELINE_SCRIPT watch" >/dev/null 2>&1 || true
echo "stopped: $service"
}
linux_status() {
local service pids
service="$(linux_service_name)"
if systemctl --user status "$service" >/dev/null 2>&1; then
echo "systemd --user: loaded"
else
echo "systemd --user: not loaded"
fi
pids="$(pgrep -af "$PIPELINE_SCRIPT watch" || true)"
if [[ -n "$pids" ]]; then
echo "process:"
echo "$pids"
else
echo "process: not running"
fi
if [[ -f "$PIPELINE_LOG" ]]; then
echo "pipeline log:"
tail -n 5 "$PIPELINE_LOG"
fi
show_pending_uploads
}
cmd_start() {
case "$PLATFORM" in
Darwin) macos_start ;;
Linux) linux_start ;;
*)
echo "unsupported platform: $PLATFORM" >&2
exit 1
;;
esac
}
cmd_stop() {
case "$PLATFORM" in
Darwin) macos_stop ;;
Linux) linux_stop ;;
*)
echo "unsupported platform: $PLATFORM" >&2
exit 1
;;
esac
}
cmd_restart() {
cmd_stop
cmd_start
}
cmd_install() {
cmd_start
}
cmd_uninstall() {
case "$PLATFORM" in
Darwin) exec "$PROJECT_DIR/uninstall_launch_agent.sh" ;;
Linux) exec "$PROJECT_DIR/deploy/linux/uninstall_systemd_service.sh" ;;
*)
echo "unsupported platform: $PLATFORM" >&2
exit 1
;;
esac
}
cmd_status() {
case "$PLATFORM" in
Darwin) macos_status ;;
Linux) linux_status ;;
*)
echo "unsupported platform: $PLATFORM" >&2
exit 1
;;
esac
}
cmd_logs() {
local which_log="${1:-pipeline}"
case "$PLATFORM" in
Darwin)
case "$which_log" in
pipeline) tail -f "$PIPELINE_LOG" ;;
stdout) tail -f "$PROJECT_DIR/launchd.stdout.log" ;;
stderr) tail -f "$PROJECT_DIR/launchd.stderr.log" ;;
all) tail -f "$PIPELINE_LOG" "$PROJECT_DIR/launchd.stdout.log" "$PROJECT_DIR/launchd.stderr.log" ;;
*)
echo "unknown log target: $which_log" >&2
exit 1
;;
esac
;;
Linux)
case "$which_log" in
pipeline) tail -f "$PIPELINE_LOG" ;;
service|journal) journalctl --user -u "$(linux_service_name)" -f ;;
all) tail -f "$PIPELINE_LOG" ;;
*)
echo "unknown log target: $which_log" >&2
exit 1
;;
esac
;;
*)
echo "unsupported platform: $PLATFORM" >&2
exit 1
;;
esac
}
cmd_scan() {
exec "$PYTHON_BIN" "$PIPELINE_SCRIPT" scan "$@"
}
cmd_cleanup() {
exec "$PYTHON_BIN" "$PIPELINE_SCRIPT" cleanup
}
cmd_memos_sync() {
exec "$PYTHON_BIN" "$PIPELINE_SCRIPT" memos-sync
}
cmd_memos_auth() {
exec "$PYTHON_BIN" "$PIPELINE_SCRIPT" memos-auth
}
cmd_retry_uploads() {
exec "$PYTHON_BIN" "$PIPELINE_SCRIPT" retry-uploads
}
cmd_migrate_archive() {
exec "$PYTHON_BIN" "$PIPELINE_SCRIPT" migrate-archive
}
cmd_reprocess() {
if [[ $# -lt 1 ]]; then
echo "usage: transcript reprocess <basename>" >&2
exit 1
fi
exec "$PYTHON_BIN" "$PIPELINE_SCRIPT" reprocess "$1"
}
cmd_bundle() {
exec "$PROJECT_DIR/scripts/create_migration_bundle.sh"
}
cmd_help() {
cat <<'EOF'
Usage: transcript <command> [args]
Commands:
install Install and start the background watcher
start Start or install the background watcher
stop Stop the background watcher
restart Restart the background watcher
uninstall Uninstall the background watcher service
status Show service/process status and recent pipeline log lines
logs [pipeline|stdout|stderr|service|journal|all]
Follow log output
scan Process currently available transcript/audio pairs once
cleanup Delete archived originals older than the retention period
memos-sync Export published notes for Quartz and rebuild memos.maddin.app
memos-auth Generate the Basic Auth htpasswd file from .env
retry-uploads Retry remote audio uploads from the local archive
migrate-archive Retroactively migrate archived sources to the new schema
reprocess <basename> Reprocess one basename from the watch folder
bundle Build a portable migration tarball in dist/
help Show this help
EOF
}
command="${1:-help}"
shift || true
case "$command" in
install) cmd_install ;;
start) cmd_start ;;
stop) cmd_stop ;;
restart) cmd_restart ;;
uninstall) cmd_uninstall ;;
status) cmd_status ;;
logs) cmd_logs "$@" ;;
scan) cmd_scan "$@" ;;
cleanup) cmd_cleanup ;;
memos-sync) cmd_memos_sync ;;
memos-auth) cmd_memos_auth ;;
retry-uploads) cmd_retry_uploads ;;
migrate-archive) cmd_migrate_archive ;;
reprocess) cmd_reprocess "$@" ;;
bundle) cmd_bundle ;;
help|-h|--help) cmd_help ;;
*)
echo "unknown command: $command" >&2
echo >&2
cmd_help >&2
exit 1
;;
esac