from __future__ import annotations import tempfile import unittest from unittest import mock from datetime import UTC, datetime from pathlib import Path from transcript_pipeline import ( AudioMetadata, Settings, TranscriptPipeline, build_frontmatter, format_note_date, format_duration, join_remote_path, remove_blank_lines, strip_leading_h1, short_hash, slugify_title, split_text_into_chunks, ) class UtilityTests(unittest.TestCase): def test_slugify_title(self) -> None: self.assertEqual(slugify_title(" 2026/04: Elternabend? "), "2026 04 Elternabend") def test_format_duration(self) -> None: self.assertEqual(format_duration(59), "0:59") self.assertEqual(format_duration(3661), "1:01:01") def test_format_note_date(self) -> None: self.assertEqual(format_note_date(datetime(2026, 4, 9, 18, 0, tzinfo=UTC)), "260409") def test_split_text_into_chunks(self) -> None: text = "\n\n".join([f"Paragraph {index} " + ("x" * 50) for index in range(10)]) chunks = split_text_into_chunks(text, target_chars=160) self.assertGreater(len(chunks), 1) self.assertTrue(all(chunk.strip() for chunk in chunks)) def test_build_frontmatter(self) -> None: frontmatter = build_frontmatter( { "title": "Test", "duration_seconds": 12.5, "tags": ["transkript", "ki-zusammenfassung"], } ) self.assertIn('title: "Test"', frontmatter) self.assertIn("duration_seconds: 12.5", frontmatter) self.assertIn(' - "transkript"', frontmatter) def test_remove_blank_lines(self) -> None: text = "a\n\nb\n \n\nc\n" self.assertEqual(remove_blank_lines(text), "a\nb\nc\n") def test_strip_leading_h1(self) -> None: text = "# Titel\n## Abschnitt\nText\n" self.assertEqual(strip_leading_h1(text, "Titel"), "## Abschnitt\nText") def test_join_remote_path(self) -> None: self.assertEqual( join_remote_path("transkripte:/", "abc123", "audio", "file.m4a"), "transkripte:/abc123/audio/file.m4a", ) class NotePathTests(unittest.TestCase): def setUp(self) -> None: self.tempdir = tempfile.TemporaryDirectory() self.base_dir = Path(self.tempdir.name) self.settings = Settings( base_dir=self.base_dir, watch_dir=self.base_dir / "watch", obsidian_dir=self.base_dir / "vault", archive_dir=self.base_dir / "archive", memos_enabled=True, memos_site_url="https://memos.maddin.app", memos_content_dir=self.base_dir / "memos-content", memos_quartz_dir=self.base_dir / "memos-quartz", memos_output_dir=self.base_dir / "memos-site", memos_build_command="true", memos_rclone_remote=None, memos_rclone_excludes=(), memos_sync_htpasswd=False, memos_remote_htpasswd_path=None, memos_basic_auth_user="maddin", memos_basic_auth_password="secret", memos_basic_auth_htpasswd_path=self.base_dir / "deploy/nginx/memos.htpasswd", prompt_path=self.base_dir / "prompt.md", state_db_path=self.base_dir / "state.sqlite3", log_path=self.base_dir / "pipeline.log", openai_api_key="test-key", openai_model="test-model", debounce_seconds=1, retention_days=7, request_timeout_seconds=30, ffprobe_bin="/opt/homebrew/bin/ffprobe", fswatch_bin="/opt/homebrew/bin/fswatch", rclone_bin="/opt/homebrew/bin/rclone", rclone_remote="transkripte:/", ntfy_base_url="https://ntfy.maddin.app", ntfy_topic="Transkript", ntfy_access_token=None, ) self.settings.ensure_directories() self.settings.prompt_path.write_text("Prompt", encoding="utf-8") self.settings.memos_quartz_dir.mkdir(parents=True, exist_ok=True) self.pipeline = TranscriptPipeline(self.settings) def tearDown(self) -> None: self.pipeline.close() self.tempdir.cleanup() def test_unique_note_path_uses_existing_source_file(self) -> None: source_id = short_hash("source") existing = self.settings.obsidian_dir / "260409 Elternabend.md" existing.write_text( f'---\nsource_id: "{source_id}"\n---\n# Test\n', encoding="utf-8", ) target = self.pipeline.build_note_target( source_id=source_id, title="Elternabend", recorded_at=datetime(2026, 4, 9, 18, 0, tzinfo=UTC), note_type="summary", ) self.assertEqual(target.note_path, existing) def test_unique_note_path_adds_suffix_for_foreign_note(self) -> None: source_id = short_hash("source-a") foreign_id = short_hash("source-b") existing = self.settings.obsidian_dir / "260409 Elternabend.md" existing.write_text( f'---\nsource_id: "{foreign_id}"\n---\n# Other\n', encoding="utf-8", ) target = self.pipeline.build_note_target( source_id=source_id, title="Elternabend", recorded_at=datetime(2026, 4, 9, 18, 0, tzinfo=UTC), note_type="summary", ) self.assertEqual(target.note_path.name, "260409 Elternabend-2.md") def test_raw_transcript_note_uses_suffix(self) -> None: source_id = short_hash("source-raw") target = self.pipeline.build_note_target( source_id=source_id, title="Elternabend", recorded_at=datetime(2026, 4, 9, 18, 0, tzinfo=UTC), note_type="raw_transcript", ) self.assertEqual(target.note_path.name, "260409 Elternabend - Transkript.md") def test_build_source_id_is_stable(self) -> None: metadata = AudioMetadata( recorded_at=datetime(2026, 4, 9, 18, 0, tzinfo=UTC), recorded_at_source="ffprobe.creation_time", duration_seconds=120.0, duration_human="2:00", audio_size=1024, ) source_id_a = self.pipeline.build_source_id( type("Pair", (), {"basename": "Meeting"})(), metadata, ) source_id_b = self.pipeline.build_source_id( type("Pair", (), {"basename": "Meeting"})(), metadata, ) self.assertEqual(source_id_a, source_id_b) def test_process_available_pairs_marks_unstable_scan_for_retry(self) -> None: pair = type("Pair", (), {"basename": "Meeting"})() self.pipeline.scan_pairs = lambda: [pair] # type: ignore[method-assign] self.pipeline.files_stable = lambda _: False # type: ignore[method-assign] self.pipeline.process_pair = lambda _: self.fail("process_pair should not run") # type: ignore[method-assign] processed = self.pipeline.process_available_pairs() self.assertEqual(processed, 0) self.assertTrue(self.pipeline.last_scan_had_unstable) def test_sync_memos_site_exports_public_note_without_audio_sources(self) -> None: note_path = self.settings.obsidian_dir / "260413 Testmemo.md" note_path.write_text( """--- title: "Testmemo" type: "summary" date: "2026-04-13" recorded_at: "2026-04-13T12:00:00+00:00" duration_human: "5:00" source_id: "abc123" source_audio_cache: "/tmp/audio.m4a" remote_audio: "transkripte:/abc123/audio/test.m4a" remote_audio_status: "uploaded" processed_at: "2026-04-13T12:10:00+00:00" updated_at: "2026-04-13T12:10:00+00:00" tags: - "transkript" - "ki-zusammenfassung" --- ## Metadaten - Quelle: `Test` ## Zusammenfassung Kurzfassung ## Transkript Hallo Welt ## Quellen - Remote-Audio: `transkripte:/abc123/audio/test.m4a` - Upload-Status: `uploaded` """, encoding="utf-8", ) exported_count = self.pipeline.sync_memos_site() self.assertEqual(exported_count, 1) exported_path = self.settings.memos_content_dir / "transkripte" / note_path.name exported_text = exported_path.read_text(encoding="utf-8") self.assertIn('title: "Testmemo"', exported_text) self.assertIn("## Transkript", exported_text) self.assertNotIn("remote_audio", exported_text) self.assertNotIn("## Quellen", exported_text) self.assertNotIn("Upload-Status", exported_text) self.assertTrue((self.settings.memos_content_dir / "index.md").exists()) def test_sync_memos_site_writes_paginated_index_pages(self) -> None: for index in range(25): note_path = self.settings.obsidian_dir / f"2604{index:02d} Memo {index:02d}.md" note_path.write_text( f"""--- title: "Memo {index:02d}" type: "summary" date: "2026-04-{(index % 28) + 1:02d}" recorded_at: "2026-04-{(index % 28) + 1:02d}T12:00:00+00:00" source_id: "src-{index:02d}" tags: - "transkript" --- ## Zusammenfassung Memo {index:02d} """, encoding="utf-8", ) self.pipeline.sync_memos_site() index_text = (self.settings.memos_content_dir / "index.md").read_text(encoding="utf-8") page_two_text = (self.settings.memos_content_dir / "seite-2.md").read_text(encoding="utf-8") self.assertIn("[[seite-2|Ältere Memos]]", index_text) self.assertIn("[[index|Neuere Memos]]", page_two_text) self.assertIn("## Übersicht Seite 2", page_two_text) def test_ensure_pretty_urls_creates_directory_index_copies(self) -> None: assert self.pipeline.memos_publisher is not None html_path = self.settings.memos_output_dir / "seite-2.html" html_path.parent.mkdir(parents=True, exist_ok=True) html_path.write_text("Page 2", encoding="utf-8") self.pipeline.memos_publisher.ensure_pretty_urls() pretty_path = self.settings.memos_output_dir / "seite-2" / "index.html" self.assertTrue(pretty_path.exists()) self.assertEqual(pretty_path.read_text(encoding="utf-8"), "Page 2") @mock.patch("transcript_pipeline.subprocess.run") def test_memos_deploy_uses_rclone_and_optionally_htpasswd(self, run_mock: mock.Mock) -> None: assert self.pipeline.memos_publisher is not None self.settings.memos_rclone_remote = "mln:/home/maddin/transkripte/site" self.settings.memos_rclone_excludes = ("README.md", ".gitignore") self.settings.memos_sync_htpasswd = True self.settings.memos_remote_htpasswd_path = "mln:/home/maddin/transkripte/.htpasswd" self.settings.memos_basic_auth_htpasswd_path.write_text("maddin:hash\n", encoding="utf-8") self.pipeline.memos_publisher.deploy_site() commands = [call.args[0] for call in run_mock.call_args_list] self.assertIn( [ "/opt/homebrew/bin/rclone", "sync", "--delete-after", "--fast-list", "--exclude", "README.md", "--exclude", ".gitignore", str(self.settings.memos_output_dir), "mln:/home/maddin/transkripte/site", ], commands, ) self.assertIn( [ "/opt/homebrew/bin/rclone", "copyto", str(self.settings.memos_basic_auth_htpasswd_path), "mln:/home/maddin/transkripte/.htpasswd", ], commands, ) if __name__ == "__main__": unittest.main()