From 38cadb8cdc82a7765b3c8edaa333208ab4845f5b Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:03:14 +0300 Subject: [PATCH 01/58] fix: overhaul duplicate detection scoring, add address matching, trigger after imports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The duplicate detection worker had several bugs preventing it from catching obvious duplicates: - Scoring formula (name*0.4 + email*0.35 + phone*0.25 with threshold 0.4) meant contacts sharing the same email but with different names scored 0.35, below the threshold — silently missed. - Email comparison was case-sensitive. - Only one side of email/phone field pairs had its type verified. - Address data was completely ignored. - No import worker triggered duplicate detection after completion. Fixes: - Replace additive scoring with max-signal + bonus approach where each signal independently qualifies (email=0.85, phone=0.75, address=0.60, name=similarity) - Add case-insensitive email matching via LOWER() fragments - Filter both cf1 and cf2 contact_field_types in email/phone queries - Use LIKE 'mailto%' pattern to handle protocol colon inconsistency - Add address matching on normalized line1 + postal_code - Enqueue DuplicateDetectionWorker after successful completion in all three import workers (MonicaApiCrawlWorker, ImportSourceWorker, ImportWorker) - Add comprehensive test suite (20 tests) for the detection worker --- .../workers/duplicate_detection_worker.ex | 133 ++-- lib/kith/workers/import_source_worker.ex | 4 + lib/kith/workers/import_worker.ex | 4 + lib/kith/workers/monica_api_crawl_worker.ex | 4 + .../duplicate_detection_worker_test.exs | 601 ++++++++++++++++++ 5 files changed, 704 insertions(+), 42 deletions(-) create mode 100644 test/kith/workers/duplicate_detection_worker_test.exs diff --git a/lib/kith/workers/duplicate_detection_worker.ex b/lib/kith/workers/duplicate_detection_worker.ex index 8e67e29..efea61d 100644 --- a/lib/kith/workers/duplicate_detection_worker.ex +++ b/lib/kith/workers/duplicate_detection_worker.ex @@ -5,9 +5,18 @@ defmodule Kith.Workers.DuplicateDetectionWorker do Detection algorithm: 1. Name similarity via pg_trgm similarity() on display_name (threshold: 0.5) - 2. Exact email match across contact_fields - 3. Exact phone match across contact_fields - 4. Weighted score: name(0.4) + email(0.35) + phone(0.25) + 2. Case-insensitive email match across contact_fields + 3. Normalized phone match across contact_fields (digits only) + 4. Address match on line1 + postal_code + + Scoring (max-signal + bonus): + Each signal has an independent base score: + - email_match: 0.85 + - phone_match: 0.75 + - address_match: 0.60 + - name_match: the raw pg_trgm similarity (> 0.5) + Final score = max(base scores) + 0.05 per additional signal, capped at 1.0 + Threshold: >= 0.5 """ use Oban.Worker, @@ -15,7 +24,7 @@ defmodule Kith.Workers.DuplicateDetectionWorker do max_attempts: 3 import Ecto.Query - alias Kith.Contacts.{Contact, ContactField, DuplicateCandidate} + alias Kith.Contacts.{Address, Contact, ContactField, ContactFieldType, DuplicateCandidate} alias Kith.Repo @impl Oban.Worker @@ -36,33 +45,25 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp detect_duplicates(account_id) do - # Get active contacts for this account - contacts = + contact_count = Contact |> where([c], c.account_id == ^account_id) |> where([c], is_nil(c.deleted_at)) - |> select([c], %{id: c.id, display_name: c.display_name}) - |> Repo.all() + |> Repo.aggregate(:count) - if length(contacts) < 2, do: :ok, else: find_duplicates(account_id, contacts) + if contact_count >= 2, do: find_duplicates(account_id) end - defp find_duplicates(account_id, _contacts) do - # Find name-based duplicates using pg_trgm + defp find_duplicates(account_id) do name_matches = find_name_matches(account_id) - - # Find email-based duplicates email_matches = find_email_matches(account_id) - - # Find phone-based duplicates phone_matches = find_phone_matches(account_id) + address_matches = find_address_matches(account_id) - # Merge and score all matches all_pairs = - merge_matches(name_matches, email_matches, phone_matches) - |> Enum.filter(fn {_pair, score, _reasons} -> score >= 0.4 end) + merge_matches(name_matches, email_matches, phone_matches, address_matches) + |> Enum.filter(fn {_pair, score, _reasons} -> score >= 0.5 end) - # Get existing pending/dismissed candidates to avoid re-inserting existing = DuplicateCandidate |> where([d], d.account_id == ^account_id) @@ -73,9 +74,7 @@ defmodule Kith.Workers.DuplicateDetectionWorker do now = DateTime.utc_now() |> DateTime.truncate(:second) - # Insert new candidates Enum.each(all_pairs, fn {{id1, id2}, score, reasons} -> - # Canonicalize: smaller id first {contact_id, dup_id} = if id1 < id2, do: {id1, id2}, else: {id2, id1} unless MapSet.member?(existing, {contact_id, dup_id}) do @@ -93,7 +92,6 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp find_name_matches(account_id) do - # Use pg_trgm similarity for fuzzy name matching query = """ SELECT c1.id AS id1, c2.id AS id2, similarity(c1.display_name, c2.display_name) AS sim FROM contacts c1 @@ -102,6 +100,8 @@ defmodule Kith.Workers.DuplicateDetectionWorker do WHERE c1.account_id = $1 AND c1.deleted_at IS NULL AND c2.deleted_at IS NULL + AND c1.display_name IS NOT NULL AND c1.display_name != '' + AND c2.display_name IS NOT NULL AND c2.display_name != '' AND similarity(c1.display_name, c2.display_name) > 0.5 ORDER BY sim DESC LIMIT 500 @@ -119,55 +119,94 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp find_email_matches(account_id) do - # Find contacts that share an exact email address + # Case-insensitive email match, both fields verified as email type query = from cf1 in ContactField, join: cf2 in ContactField, - on: cf1.value == cf2.value and cf1.id < cf2.id, - join: cft in assoc(cf1, :contact_field_type), + on: + fragment("LOWER(?)", cf1.value) == fragment("LOWER(?)", cf2.value) and + cf1.id < cf2.id, + join: cft1 in ContactFieldType, + on: cf1.contact_field_type_id == cft1.id, + join: cft2 in ContactFieldType, + on: cf2.contact_field_type_id == cft2.id, where: cf1.account_id == ^account_id, where: cf2.account_id == ^account_id, - where: cft.protocol == "mailto:", + where: fragment("? LIKE 'mailto%'", cft1.protocol), + where: fragment("? LIKE 'mailto%'", cft2.protocol), where: cf1.contact_id != cf2.contact_id, + where: cf1.value != "" and not is_nil(cf1.value), select: {cf1.contact_id, cf2.contact_id} query |> Repo.all() - |> Enum.uniq() |> Enum.map(fn {id1, id2} -> - {id1, id2} = if id1 < id2, do: {id1, id2}, else: {id2, id1} - {{id1, id2}, 1.0, ["email_match"]} + if id1 < id2, do: {id1, id2}, else: {id2, id1} end) - |> Enum.uniq_by(fn {pair, _, _} -> pair end) + |> Enum.uniq() + |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["email_match"]} end) end defp find_phone_matches(account_id) do - # Find contacts that share an exact phone number (normalized: digits only) + # Normalized phone match (digits only), both fields verified as phone type query = from cf1 in ContactField, join: cf2 in ContactField, on: fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf1.value) == - fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf2.value) and cf1.id < cf2.id, - join: cft in assoc(cf1, :contact_field_type), + fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf2.value) and + cf1.id < cf2.id, + join: cft1 in ContactFieldType, + on: cf1.contact_field_type_id == cft1.id, + join: cft2 in ContactFieldType, + on: cf2.contact_field_type_id == cft2.id, where: cf1.account_id == ^account_id, where: cf2.account_id == ^account_id, - where: cft.protocol == "tel:", + where: fragment("? LIKE 'tel%'", cft1.protocol), + where: fragment("? LIKE 'tel%'", cft2.protocol), where: cf1.contact_id != cf2.contact_id, + where: cf1.value != "" and not is_nil(cf1.value), select: {cf1.contact_id, cf2.contact_id} query |> Repo.all() + |> Enum.map(fn {id1, id2} -> + if id1 < id2, do: {id1, id2}, else: {id2, id1} + end) |> Enum.uniq() + |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["phone_match"]} end) + end + + defp find_address_matches(account_id) do + # Match on normalized line1 + postal_code + query = + from a1 in Address, + join: a2 in Address, + on: + fragment("LOWER(TRIM(?))", a1.line1) == fragment("LOWER(TRIM(?))", a2.line1) and + fragment("LOWER(TRIM(?))", a1.postal_code) == + fragment("LOWER(TRIM(?))", a2.postal_code) and + a1.id < a2.id, + where: a1.account_id == ^account_id, + where: a2.account_id == ^account_id, + where: a1.contact_id != a2.contact_id, + where: a1.line1 != "" and not is_nil(a1.line1), + where: a1.postal_code != "" and not is_nil(a1.postal_code), + where: a2.line1 != "" and not is_nil(a2.line1), + where: a2.postal_code != "" and not is_nil(a2.postal_code), + select: {a1.contact_id, a2.contact_id} + + query + |> Repo.all() |> Enum.map(fn {id1, id2} -> - {id1, id2} = if id1 < id2, do: {id1, id2}, else: {id2, id1} - {{id1, id2}, 1.0, ["phone_match"]} + if id1 < id2, do: {id1, id2}, else: {id2, id1} end) - |> Enum.uniq_by(fn {pair, _, _} -> pair end) + |> Enum.uniq() + |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["address_match"]} end) end - defp merge_matches(name_matches, email_matches, phone_matches) do - (name_matches ++ email_matches ++ phone_matches) + defp merge_matches(name_matches, email_matches, phone_matches, address_matches) do + (name_matches ++ email_matches ++ phone_matches ++ address_matches) |> Enum.group_by(fn {pair, _score, _reasons} -> pair end) |> Enum.map(&compute_merged_score/1) end @@ -176,9 +215,19 @@ defmodule Kith.Workers.DuplicateDetectionWorker do reasons = matches |> Enum.flat_map(fn {_, _, r} -> r end) |> Enum.uniq() name_sim = Enum.find_value(matches, 0.0, &extract_name_score/1) - email_weight = if "email_match" in reasons, do: 0.35, else: 0.0 - phone_weight = if "phone_match" in reasons, do: 0.25, else: 0.0 - score = min(name_sim * 0.4 + email_weight + phone_weight, 1.0) + # Base score for each signal type + base_scores = + [] + |> then(fn acc -> if "email_match" in reasons, do: [0.85 | acc], else: acc end) + |> then(fn acc -> if "phone_match" in reasons, do: [0.75 | acc], else: acc end) + |> then(fn acc -> if "address_match" in reasons, do: [0.60 | acc], else: acc end) + |> then(fn acc -> if name_sim > 0.0, do: [name_sim | acc], else: acc end) + + signal_count = length(base_scores) + max_score = Enum.max(base_scores, fn -> 0.0 end) + bonus = max(signal_count - 1, 0) * 0.05 + + score = min(max_score + bonus, 1.0) {pair, Float.round(score, 2), reasons} end diff --git a/lib/kith/workers/import_source_worker.ex b/lib/kith/workers/import_source_worker.ex index 6cdf883..d5feaa6 100644 --- a/lib/kith/workers/import_source_worker.ex +++ b/lib/kith/workers/import_source_worker.ex @@ -11,6 +11,7 @@ defmodule Kith.Workers.ImportSourceWorker do alias Kith.Imports alias Kith.Storage + alias Kith.Workers.DuplicateDetectionWorker @impl Oban.Worker def perform(%Oban.Job{args: %{"import_id" => import_id}}) do @@ -33,6 +34,9 @@ defmodule Kith.Workers.ImportSourceWorker do topic = "import:#{import.account_id}" Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import.account_id})) + Logger.info("Import #{import_id} completed: #{inspect(summary_map)}") :ok else diff --git a/lib/kith/workers/import_worker.ex b/lib/kith/workers/import_worker.ex index 790620e..68dde77 100644 --- a/lib/kith/workers/import_worker.ex +++ b/lib/kith/workers/import_worker.ex @@ -11,6 +11,7 @@ defmodule Kith.Workers.ImportWorker do alias Kith.Contacts alias Kith.VCard.Parser + alias Kith.Workers.DuplicateDetectionWorker @impl Oban.Worker def perform(%Oban.Job{ @@ -42,6 +43,9 @@ defmodule Kith.Workers.ImportWorker do {:import_complete, results} ) + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: account_id})) + Logger.info( "vCard import complete for account #{account_id}: " <> "#{results.imported} imported, #{results.skipped} skipped" diff --git a/lib/kith/workers/monica_api_crawl_worker.ex b/lib/kith/workers/monica_api_crawl_worker.ex index b5355ba..f366140 100644 --- a/lib/kith/workers/monica_api_crawl_worker.ex +++ b/lib/kith/workers/monica_api_crawl_worker.ex @@ -15,6 +15,7 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do alias Kith.Imports alias Kith.Imports.Sources.MonicaApi + alias Kith.Workers.DuplicateDetectionWorker @impl Oban.Worker def perform(%Oban.Job{args: %{"import_id" => import_id}}) do @@ -47,6 +48,9 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do topic = "import:#{import_job.account_id}" Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) + Logger.info("MonicaApi import #{import_id} completed: #{inspect(summary_map)}") :ok else diff --git a/test/kith/workers/duplicate_detection_worker_test.exs b/test/kith/workers/duplicate_detection_worker_test.exs new file mode 100644 index 0000000..8f2728e --- /dev/null +++ b/test/kith/workers/duplicate_detection_worker_test.exs @@ -0,0 +1,601 @@ +defmodule Kith.Workers.DuplicateDetectionWorkerTest do + use Kith.DataCase, async: true + use Oban.Testing, repo: Kith.Repo + + import Kith.Factory + import Kith.ContactsFixtures + + alias Kith.Contacts.DuplicateCandidate + alias Kith.Workers.DuplicateDetectionWorker + + setup do + seed_reference_data!() + {account, _user} = setup_account() + + email_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "mailto:", + select: %{id: t.id}, + limit: 1 + ) + + phone_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "tel:", + select: %{id: t.id}, + limit: 1 + ) + + %{account: account, email_type_id: email_type.id, phone_type_id: phone_type.id} + end + + defp run_detection(account_id) do + perform_job(DuplicateDetectionWorker, %{account_id: account_id}) + end + + defp pending_candidates(account_id) do + DuplicateCandidate + |> where([d], d.account_id == ^account_id) + |> where([d], d.status == "pending") + |> order_by([d], desc: d.score) + |> Repo.all() + end + + describe "name matching" do + test "detects contacts with similar display names", %{account: account} do + insert(:contact, + account: account, + display_name: "John Smith", + first_name: "John", + last_name: "Smith" + ) + + insert(:contact, + account: account, + display_name: "John Smithe", + first_name: "John", + last_name: "Smithe" + ) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "name_match" in hd(candidates).reasons + assert hd(candidates).score >= 0.5 + end + + test "does not match dissimilar names", %{account: account} do + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + end + + describe "email matching" do + test "detects contacts sharing the same email", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "email_match" in hd(candidates).reasons + assert hd(candidates).score >= 0.8 + end + + test "email matching is case-insensitive", %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "SHARED@Example.COM"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "email_match" in hd(candidates).reasons + end + + test "email-only match scores around 0.85", %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Completely Different", + first_name: "Completely", + last_name: "Different" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Totally Unique", + first_name: "Totally", + last_name: "Unique" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "same@email.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "same@email.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert hd(candidates).score == 0.85 + end + end + + describe "phone matching" do + test "detects contacts sharing the same phone number", %{ + account: account, + phone_type_id: phone_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, phone_type_id, %{"value" => "+1-555-1234"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "+1-555-1234"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "phone_match" in hd(candidates).reasons + assert hd(candidates).score >= 0.7 + end + + test "phone matching normalizes formatting", %{account: account, phone_type_id: phone_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, phone_type_id, %{"value" => "+1-555-1234"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "15551234"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "phone_match" in hd(candidates).reasons + end + + test "phone-only match scores 0.75", %{account: account, phone_type_id: phone_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Completely Different", + first_name: "Completely", + last_name: "Different" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Totally Unique", + first_name: "Totally", + last_name: "Unique" + ) + + contact_field_fixture(c1, phone_type_id, %{"value" => "5559876"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "5559876"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert hd(candidates).score == 0.75 + end + end + + describe "address matching" do + test "detects contacts sharing the same address", %{account: account} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + address_fixture(c1, %{"line1" => "123 Main St", "postal_code" => "90210"}) + address_fixture(c2, %{"line1" => "123 Main St", "postal_code" => "90210"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "address_match" in hd(candidates).reasons + assert hd(candidates).score == 0.6 + end + + test "address matching is case-insensitive and trims whitespace", %{account: account} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + address_fixture(c1, %{"line1" => " 123 Main St ", "postal_code" => "90210"}) + address_fixture(c2, %{"line1" => "123 MAIN ST", "postal_code" => "90210"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "address_match" in hd(candidates).reasons + end + + test "does not match on postal_code alone", %{account: account} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + address_fixture(c1, %{"line1" => "123 Main St", "postal_code" => "90210"}) + address_fixture(c2, %{"line1" => "456 Oak Ave", "postal_code" => "90210"}) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + end + + describe "combined signals" do + test "email + name match scores higher than email alone", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "John Smith", + first_name: "John", + last_name: "Smith" + ) + + c2 = + insert(:contact, + account: account, + display_name: "John Smithe", + first_name: "John", + last_name: "Smithe" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "john@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "john@example.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + candidate = hd(candidates) + assert "name_match" in candidate.reasons + assert "email_match" in candidate.reasons + # email base (0.85) + bonus for name signal (0.05) = 0.90 + assert candidate.score > 0.85 + end + + test "email + phone match boosts score", %{ + account: account, + email_type_id: email_type_id, + phone_type_id: phone_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Completely Different", + first_name: "Completely", + last_name: "Different" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Totally Unique", + first_name: "Totally", + last_name: "Unique" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "same@email.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "same@email.com"}) + contact_field_fixture(c1, phone_type_id, %{"value" => "5551234"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "5551234"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + candidate = hd(candidates) + assert "email_match" in candidate.reasons + assert "phone_match" in candidate.reasons + # email base (0.85) + 1 bonus (0.05) = 0.90 + assert candidate.score == 0.9 + end + end + + describe "edge cases" do + test "skips soft-deleted contacts", %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams", + deleted_at: DateTime.utc_now(:second) + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + + test "does not re-insert existing pending candidates", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + # First run + assert :ok = run_detection(account.id) + assert length(pending_candidates(account.id)) == 1 + + # Second run should not create duplicates + assert :ok = run_detection(account.id) + assert length(pending_candidates(account.id)) == 1 + end + + test "does not re-insert dismissed candidates", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + # First run, then dismiss + assert :ok = run_detection(account.id) + [candidate] = pending_candidates(account.id) + Kith.DuplicateDetection.dismiss_candidate(candidate) + + # Second run should not re-create dismissed candidate + assert :ok = run_detection(account.id) + assert pending_candidates(account.id) == [] + end + + test "account isolation — only detects within same account", %{email_type_id: email_type_id} do + {account1, _} = setup_account() + {account2, _} = setup_account() + + c1 = + insert(:contact, + account: account1, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account2, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account1.id) + assert :ok = run_detection(account2.id) + + assert pending_candidates(account1.id) == [] + assert pending_candidates(account2.id) == [] + end + + test "handles fewer than 2 contacts gracefully", %{account: account} do + insert(:contact, + account: account, + display_name: "Only Contact", + first_name: "Only", + last_name: "Contact" + ) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + + test "handles zero contacts gracefully", %{account: account} do + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + end + + describe "cron mode" do + test "runs for all accounts when no account_id provided" do + {account1, _} = setup_account() + {account2, _} = setup_account() + + insert(:contact, + account: account1, + display_name: "John Smith", + first_name: "John", + last_name: "Smith" + ) + + insert(:contact, + account: account1, + display_name: "John Smithe", + first_name: "John", + last_name: "Smithe" + ) + + insert(:contact, + account: account2, + display_name: "Jane Doe", + first_name: "Jane", + last_name: "Doe" + ) + + insert(:contact, + account: account2, + display_name: "Jane Doee", + first_name: "Jane", + last_name: "Doee" + ) + + assert :ok = perform_job(DuplicateDetectionWorker, %{}) + + assert length(pending_candidates(account1.id)) == 1 + assert length(pending_candidates(account2.id)) == 1 + end + end +end From 38cba058db077fdfa0bd5500e92f2d3b5ac78ff6 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:51:21 +0300 Subject: [PATCH 02/58] fix: paginate duplicates page to prevent timeout on large result sets list_candidates now takes limit/offset opts (default 20 per page). The LiveView loads one page at a time with a "Load more" button. Dismiss removes the candidate from the current list without reloading. --- lib/kith/duplicate_detection.ex | 6 + lib/kith_web/live/contact_live/duplicates.ex | 150 ++++++++++++------- 2 files changed, 98 insertions(+), 58 deletions(-) diff --git a/lib/kith/duplicate_detection.ex b/lib/kith/duplicate_detection.ex index a7599d0..359417e 100644 --- a/lib/kith/duplicate_detection.ex +++ b/lib/kith/duplicate_detection.ex @@ -4,13 +4,19 @@ defmodule Kith.DuplicateDetection do alias Kith.Contacts.DuplicateCandidate alias Kith.Repo + @default_page_size 20 + def list_candidates(account_id, opts \\ []) do status = Keyword.get(opts, :status, "pending") + limit = Keyword.get(opts, :limit, @default_page_size) + offset = Keyword.get(opts, :offset, 0) DuplicateCandidate |> scope_to_account(account_id) |> where([d], d.status == ^status) |> order_by([d], desc: d.score) + |> limit(^limit) + |> offset(^offset) |> Repo.all() |> Repo.preload([:contact, :duplicate_contact]) end diff --git a/lib/kith_web/live/contact_live/duplicates.ex b/lib/kith_web/live/contact_live/duplicates.ex index 1dad4f5..36c951b 100644 --- a/lib/kith_web/live/contact_live/duplicates.ex +++ b/lib/kith_web/live/contact_live/duplicates.ex @@ -7,12 +7,16 @@ defmodule KithWeb.ContactLive.Duplicates do alias Kith.Policy alias Kith.Workers.DuplicateDetectionWorker + @page_size 20 + @impl true def mount(_params, _session, socket) do {:ok, socket |> assign(:page_title, "Duplicate Contacts") - |> assign(:candidates, [])} + |> assign(:candidates, []) + |> assign(:has_more, false) + |> assign(:total_count, 0)} end @impl true @@ -20,12 +24,15 @@ defmodule KithWeb.ContactLive.Duplicates do scope = socket.assigns.current_scope account_id = scope.account.id - candidates = DuplicateDetection.list_candidates(account_id) + candidates = DuplicateDetection.list_candidates(account_id, limit: @page_size) + total_count = DuplicateDetection.pending_count(account_id) {:noreply, socket |> assign(:account_id, account_id) - |> assign(:candidates, candidates)} + |> assign(:candidates, candidates) + |> assign(:total_count, total_count) + |> assign(:has_more, length(candidates) >= @page_size)} end @impl true @@ -35,15 +42,32 @@ defmodule KithWeb.ContactLive.Duplicates do {:ok, _} = DuplicateDetection.dismiss_candidate(candidate) - candidates = DuplicateDetection.list_candidates(socket.assigns.account_id) + candidates = Enum.reject(socket.assigns.candidates, &(&1.id == candidate.id)) + total_count = socket.assigns.total_count - 1 {:noreply, socket |> assign(:candidates, candidates) - |> assign(:pending_duplicates_count, length(candidates)) + |> assign(:total_count, total_count) + |> assign(:pending_duplicates_count, total_count) |> put_flash(:info, "Duplicate dismissed.")} end + def handle_event("load_more", _params, socket) do + offset = length(socket.assigns.candidates) + + more = + DuplicateDetection.list_candidates(socket.assigns.account_id, + limit: @page_size, + offset: offset + ) + + {:noreply, + socket + |> assign(:candidates, socket.assigns.candidates ++ more) + |> assign(:has_more, length(more) >= @page_size)} + end + def handle_event("scan", _params, socket) do user = socket.assigns.current_scope.user @@ -79,7 +103,7 @@ defmodule KithWeb.ContactLive.Duplicates do

Duplicate Contacts

- {length(@candidates)} potential duplicate{if length(@candidates) != 1, do: "s"} found + {@total_count} potential duplicate{if @total_count != 1, do: "s"} found

+ {candidate.duplicate_contact.display_name} + - <% end %> + +
+ <.link + navigate={ + ~p"/contacts/#{candidate.contact.id}/merge?with=#{candidate.duplicate_contact.id}&candidate_id=#{candidate.id}" + } + class="inline-flex items-center gap-1.5 rounded-[var(--radius-md)] bg-[var(--color-accent)] text-[var(--color-accent-foreground)] px-3 py-1.5 text-xs font-medium hover:bg-[var(--color-accent-hover)] transition-colors" + > + <.icon name="hero-arrows-right-left" class="size-4" /> Merge + + +
+ + +
+ +
<% end %> From ab349c617dfca1e8d85299bd84e6e7961e91ab26 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 4 Apr 2026 20:08:02 +0300 Subject: [PATCH 03/58] fix: add pagination to duplicates panel in contacts index page The /contacts/duplicates route uses ContactLive.Index, not the standalone Duplicates LiveView. Added limit/offset pagination with Load more button and optimistic dismiss (no full re-query) to match the standalone page. --- lib/kith_web/live/contact_live/index.ex | 31 +++++++++++++++++-- .../live/contact_live/index.html.heex | 13 +++++++- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/lib/kith_web/live/contact_live/index.ex b/lib/kith_web/live/contact_live/index.ex index c1f3b55..491b9b4 100644 --- a/lib/kith_web/live/contact_live/index.ex +++ b/lib/kith_web/live/contact_live/index.ex @@ -33,6 +33,8 @@ defmodule KithWeb.ContactLive.Index do |> assign(:meta, nil) |> assign(:tags, Contacts.list_tags(account_id)) |> assign(:candidates, []) + |> assign(:duplicates_total, 0) + |> assign(:duplicates_has_more, false) |> assign(:trashed_contacts, [])} end @@ -55,12 +57,18 @@ defmodule KithWeb.ContactLive.Index do |> load_contacts() end + @duplicates_page_size 20 + defp apply_action(socket, :duplicates, _params) do - candidates = DuplicateDetection.list_candidates(socket.assigns.account_id) + account_id = socket.assigns.account_id + candidates = DuplicateDetection.list_candidates(account_id, limit: @duplicates_page_size) + total_count = DuplicateDetection.pending_count(account_id) socket |> assign(:page_title, "Duplicate Contacts") |> assign(:candidates, candidates) + |> assign(:duplicates_total, total_count) + |> assign(:duplicates_has_more, length(candidates) >= @duplicates_page_size) end defp apply_action(socket, :trash, _params) do @@ -245,15 +253,32 @@ defmodule KithWeb.ContactLive.Index do {:ok, _} = DuplicateDetection.dismiss_candidate(candidate) - candidates = DuplicateDetection.list_candidates(socket.assigns.account_id) + candidates = Enum.reject(socket.assigns.candidates, &(&1.id == candidate.id)) + total = socket.assigns.duplicates_total - 1 {:noreply, socket |> assign(:candidates, candidates) - |> assign(:pending_duplicates_count, length(candidates)) + |> assign(:duplicates_total, total) + |> assign(:pending_duplicates_count, total) |> put_flash(:info, "Duplicate dismissed.")} end + def handle_event("load_more_duplicates", _params, socket) do + offset = length(socket.assigns.candidates) + + more = + DuplicateDetection.list_candidates(socket.assigns.account_id, + limit: @duplicates_page_size, + offset: offset + ) + + {:noreply, + socket + |> assign(:candidates, socket.assigns.candidates ++ more) + |> assign(:duplicates_has_more, length(more) >= @duplicates_page_size)} + end + def handle_event("scan", _params, socket) do user = socket.assigns.current_scope.user diff --git a/lib/kith_web/live/contact_live/index.html.heex b/lib/kith_web/live/contact_live/index.html.heex index 7e08df4..3db183f 100644 --- a/lib/kith_web/live/contact_live/index.html.heex +++ b/lib/kith_web/live/contact_live/index.html.heex @@ -381,7 +381,7 @@

- {length(@candidates)} potential duplicate{if length(@candidates) != 1, do: "s"} found + {@duplicates_total} potential duplicate{if @duplicates_total != 1, do: "s"} found

<% end %> + + <%= if @duplicates_has_more do %> +
+ +
+ <% end %> <% end %> <% end %> From af046bc556e8b28f1288ee5878e0402b21681368 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 4 Apr 2026 20:32:47 +0300 Subject: [PATCH 04/58] fix: handle duplicate photos during merge, reduce merge to 3 steps Photos with the same content_hash on both contacts caused a unique constraint violation during merge. Now deletes duplicate photos from the non-survivor before transferring the rest, matching the pattern used for contact_tags and activity_contacts. Also collapsed the merge flow from 4 steps to 3 by combining the preview and confirm steps into a single "Review & merge" step. From the duplicates page (contact preselected), merge is now 2 clicks instead of 3. --- lib/kith/contacts.ex | 28 +++++++++++---- lib/kith_web/live/contact_live/merge.ex | 48 +++++-------------------- 2 files changed, 29 insertions(+), 47 deletions(-) diff --git a/lib/kith/contacts.ex b/lib/kith/contacts.ex index 1427759..d92f9e4 100644 --- a/lib/kith/contacts.ex +++ b/lib/kith/contacts.ex @@ -1740,14 +1740,28 @@ defmodule Kith.Contacts do end, set: [contact_id: survivor.id] ) - # Remap photos - |> Ecto.Multi.update_all( - :remap_photos, - fn _changes -> + # Remap photos (delete duplicates by content_hash first, then move remaining) + |> Ecto.Multi.run(:remap_photos, fn repo, _changes -> + # Delete photos from non-survivor that already exist on survivor (same content_hash) + repo.query( + """ + DELETE FROM photos + WHERE contact_id = $1 + AND content_hash IS NOT NULL + AND content_hash IN ( + SELECT content_hash FROM photos WHERE contact_id = $2 AND content_hash IS NOT NULL + ) + """, + [non_survivor.id, survivor.id] + ) + + # Move remaining photos + {count, _} = from(p in Photo, where: p.contact_id == ^non_survivor.id) - end, - set: [contact_id: survivor.id] - ) + |> repo.update_all(set: [contact_id: survivor.id]) + + {:ok, count} + end) # Remap addresses |> Ecto.Multi.update_all( :remap_addresses, diff --git a/lib/kith_web/live/contact_live/merge.ex b/lib/kith_web/live/contact_live/merge.ex index 6bdad13..f45dbbc 100644 --- a/lib/kith_web/live/contact_live/merge.ex +++ b/lib/kith_web/live/contact_live/merge.ex @@ -150,13 +150,7 @@ defmodule KithWeb.ContactLive.Merge do end end - # ── Step 3: Preview ──────────────────────────────────────────────────── - - def handle_event("confirm-merge", _params, socket) do - {:noreply, assign(socket, :step, 4)} - end - - # ── Step 4: Execute ──────────────────────────────────────────────────── + # ── Step 3: Preview & Execute ─────────────────────────────────────────── def handle_event("execute-merge", _params, socket) do contact_a = socket.assigns.contact_a @@ -233,14 +227,14 @@ defmodule KithWeb.ContactLive.Merge do Merge Contacts - Step {@step} of 4 — {step_label(@step)} + Step {@step} of 3 — {step_label(@step)} <%!-- Step indicator (horizontal stepper) --%>
- Back - Confirm Merge -
- -
- - <%!-- Step 4: Confirm & Execute --%> -
- -
-

- Final Confirmation -

-

- This action cannot be easily undone. Are you sure you want to merge - - {@contact_b.display_name} - - into {@contact_a.display_name}? -

- -
- - Go Back - - - {if @merging, do: "Merging...", else: "Merge Contacts"} - -
+ Back + + {if @merging, do: "Merging...", else: "Merge Contacts"} +
@@ -463,8 +432,7 @@ defmodule KithWeb.ContactLive.Merge do defp step_label(1), do: "Select contact" defp step_label(2), do: "Choose fields" - defp step_label(3), do: "Preview" - defp step_label(4), do: "Confirm" + defp step_label(3), do: "Review & merge" defp default_field_choices do @mergeable_fields From 7cdcfb0c5d2098da686fdc7b18c9022c747021ee Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 20:58:29 +0300 Subject: [PATCH 05/58] feat: mount official Oban Web dashboard for admins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the custom hand-rolled Oban dashboard (412 LOC LiveView) with the official `oban_web` package, now open-source and free as of Oban 2.20. Mount at /admin/oban behind a new :require_admin on_mount hook gated by Kith.Policy.can?(user, :manage, :oban). Hide the "Jobs" nav link from non-admin users. Drops the dead photo_sync queue from Oban config — its worker (PhotoBatchSyncWorker) was removed in commit e474853 when Monica imports moved to API crawling. --- config/config.exs | 3 +- lib/kith/policy.ex | 4 + lib/kith_web/components/layouts.ex | 2 + .../live/admin_live/oban_dashboard.ex | 412 ------------------ lib/kith_web/router.ex | 15 +- lib/kith_web/user_auth.ex | 19 + mix.exs | 1 + mix.lock | 2 + test/kith/policy_test.exs | 27 ++ test/kith_web/user_auth_test.exs | 49 +++ 10 files changed, 119 insertions(+), 415 deletions(-) delete mode 100644 lib/kith_web/live/admin_live/oban_dashboard.ex create mode 100644 test/kith/policy_test.exs diff --git a/config/config.exs b/config/config.exs index 13f1eee..f555214 100644 --- a/config/config.exs +++ b/config/config.exs @@ -40,8 +40,7 @@ config :kith, Oban, exports: 2, imports: 2, immich: 3, - purge: 1, - photo_sync: 5 + purge: 1 ], plugins: [ Oban.Plugins.Pruner, diff --git a/lib/kith/policy.ex b/lib/kith/policy.ex index be22a0a..1b48ef5 100644 --- a/lib/kith/policy.ex +++ b/lib/kith/policy.ex @@ -46,6 +46,7 @@ defmodule Kith.Policy do | :journal | :duplicate_candidate | :reference_data + | :oban @doc """ Returns true if the user is authorized to perform the given action on the resource. @@ -58,6 +59,9 @@ defmodule Kith.Policy do # ── Admin: full access ─────────────────────────────────────────────── defp authorized?("admin", _action, _resource), do: true + # ── Admin-only resources: deny for editor/viewer ───────────────────── + defp authorized?(role, _action, :oban) when role in ["editor", "viewer"], do: false + # ── Editor: CRUD on contacts and content, no account/user management ─ defp authorized?("editor", :read, resource) when resource in [:account, :audit_log], do: true diff --git a/lib/kith_web/components/layouts.ex b/lib/kith_web/components/layouts.ex index 373a562..0b6a92d 100644 --- a/lib/kith_web/components/layouts.ex +++ b/lib/kith_web/components/layouts.ex @@ -97,6 +97,7 @@ defmodule KithWeb.Layouts do match_prefix="/settings" /> <.sidebar_link + :if={Kith.Policy.can?(@current_scope.user, :manage, :oban)} path={~p"/admin/oban"} current_path={@current_path} icon="hero-queue-list" @@ -233,6 +234,7 @@ defmodule KithWeb.Layouts do match_prefix="/settings" /> <.mobile_nav_link + :if={Kith.Policy.can?(@current_scope.user, :manage, :oban)} path={~p"/admin/oban"} current_path={@current_path} icon="hero-queue-list" diff --git a/lib/kith_web/live/admin_live/oban_dashboard.ex b/lib/kith_web/live/admin_live/oban_dashboard.ex deleted file mode 100644 index 1ecc41b..0000000 --- a/lib/kith_web/live/admin_live/oban_dashboard.ex +++ /dev/null @@ -1,412 +0,0 @@ -defmodule KithWeb.AdminLive.ObanDashboard do - @moduledoc """ - Minimal Oban dashboard for free Oban (no Oban Pro/Web). - - Queries the oban_jobs table directly. Admin-only access. - """ - - use KithWeb, :live_view - - import Ecto.Query - - alias Kith.Repo - - @refresh_interval 5_000 - - @impl true - def mount(_params, _session, socket) do - user = socket.assigns.current_scope.user - - if Kith.Policy.can?(user, :manage, :account) do - if connected?(socket), do: Process.send_after(self(), :refresh, @refresh_interval) - {:ok, socket |> assign(:page_title, "Oban Dashboard") |> load_data()} - else - {:ok, - socket - |> put_flash(:error, "Admin access required.") - |> redirect(to: ~p"/dashboard")} - end - end - - @impl true - def handle_info(:refresh, socket) do - Process.send_after(self(), :refresh, @refresh_interval) - {:noreply, load_data(socket)} - end - - @impl true - def handle_event("retry-job", %{"id" => id}, socket) do - job_id = String.to_integer(id) - Oban.retry_job(job_id) - {:noreply, socket |> put_flash(:info, "Job #{job_id} retried.") |> load_data()} - end - - def handle_event("discard-job", %{"id" => id}, socket) do - job_id = String.to_integer(id) - Oban.cancel_job(job_id) - {:noreply, socket |> put_flash(:info, "Job #{job_id} discarded.") |> load_data()} - end - - defp load_data(socket) do - socket - |> assign(:queue_stats, fetch_queue_stats()) - |> assign(:recent_failures, fetch_recent_failures()) - |> assign(:recent_jobs, fetch_recent_jobs()) - |> assign(:photo_sync_jobs, fetch_photo_sync_jobs()) - end - - defp fetch_queue_stats do - from(j in "oban_jobs", - group_by: [j.queue, j.state], - select: %{ - queue: j.queue, - state: j.state, - count: count(j.id) - } - ) - |> Repo.all() - |> Enum.group_by(& &1.queue) - end - - defp fetch_recent_failures do - from(j in "oban_jobs", - where: j.state in ["retryable", "discarded"], - order_by: [desc: j.attempted_at], - limit: 20, - select: %{ - id: j.id, - worker: j.worker, - queue: j.queue, - state: j.state, - attempt: j.attempt, - max_attempts: j.max_attempts, - attempted_at: j.attempted_at, - errors: j.errors - } - ) - |> Repo.all() - end - - defp fetch_photo_sync_jobs do - from(j in "oban_jobs", - where: j.worker == "Kith.Workers.PhotoBatchSyncWorker", - order_by: [desc: j.inserted_at], - limit: 10, - select: %{ - id: j.id, - state: j.state, - attempt: j.attempt, - max_attempts: j.max_attempts, - args: j.args, - inserted_at: j.inserted_at, - attempted_at: j.attempted_at - } - ) - |> Repo.all() - |> Enum.map(fn job -> - import_id = get_in(job.args, ["import_id"]) - import_record = import_id && Kith.Imports.get_import(import_id) - - Map.merge(job, %{ - import_id: import_id, - import_source: import_record && import_record.source, - sync_summary: import_record && import_record.sync_summary - }) - end) - end - - defp fetch_recent_jobs do - from(j in "oban_jobs", - order_by: [desc: j.inserted_at], - limit: 30, - select: %{ - id: j.id, - worker: j.worker, - queue: j.queue, - state: j.state, - attempt: j.attempt, - max_attempts: j.max_attempts, - inserted_at: j.inserted_at, - attempted_at: j.attempted_at - } - ) - |> Repo.all() - end - - @impl true - def render(assigns) do - ~H""" - - - Oban Dashboard - <:subtitle>Background job monitoring (refreshes every 5s) - - - <%!-- Queue Overview --%> -
-
-

{queue}

-
-
- {s.state} - {s.count} -
-
-
-
- - <%!-- Recent Failures --%> -
-

Recent Failures

- <%= if @recent_failures == [] do %> -

No recent failures.

- <% else %> -
- - - - - - - - - - - - - - - - - - - - - - - -
- ID - - Worker - - Queue - - State - - Attempt - - Last Run - - Actions -
- {job.id} - - {short_worker(job.worker)} - {job.queue} - {job.state} - - {job.attempt}/{job.max_attempts} - - {format_time(job.attempted_at)} - - - -
-
- <% end %> -
- - <%!-- Photo Sync Jobs --%> -
-

Photo Sync Jobs

- <%= if @photo_sync_jobs == [] do %> -

No photo sync jobs.

- <% else %> -
- - - - - - - - - - - - - - - - - - - - - -
- ID - - Import - - State - - Progress - - Attempt - - Created -
- {job.id} - - <%= if job.import_id do %> - <.link - navigate={~p"/settings/imports/#{job.import_id}"} - class="text-[var(--color-accent)] hover:underline" - > - #{job.import_id} ({job.import_source || "?"}) - - <% else %> - — - <% end %> - - {job.state} - - {sync_progress(job.sync_summary)} - - {job.attempt}/{job.max_attempts} - - {format_time(job.inserted_at)} -
-
- <% end %> -
- - <%!-- Recent Jobs --%> -
-

Recent Jobs

-
- - - - - - - - - - - - - - - - - - - - - -
- ID - - Worker - - Queue - - State - - Attempt - - Inserted -
- {job.id} - - {short_worker(job.worker)} - {job.queue} - {job.state} - - {job.attempt}/{job.max_attempts} - - {format_time(job.inserted_at)} -
-
-
-
- """ - end - - defp short_worker(worker) do - worker - |> String.split(".") - |> List.last() - end - - defp state_color("completed"), do: "text-[var(--color-success)]" - defp state_color("available"), do: "text-[var(--color-info)]" - defp state_color("scheduled"), do: "text-[var(--color-text-tertiary)]" - defp state_color("executing"), do: "text-[var(--color-warning)]" - defp state_color("retryable"), do: "text-[var(--color-error)]" - defp state_color("discarded"), do: "text-[var(--color-error)]/50" - defp state_color(_), do: "text-[var(--color-text-primary)]" - - defp state_variant("completed"), do: "success" - defp state_variant("available"), do: "info" - defp state_variant("scheduled"), do: "default" - defp state_variant("executing"), do: "warning" - defp state_variant("retryable"), do: "error" - defp state_variant("discarded"), do: "error" - defp state_variant(_), do: "default" - - defp sync_progress(%{"synced" => synced, "total" => total}), do: "#{synced}/#{total} synced" - defp sync_progress(_), do: "—" - - defp format_time(nil), do: "-" - - defp format_time(%NaiveDateTime{} = ndt) do - case Kith.Cldr.DateTime.to_string(ndt, format: :medium) do - {:ok, str} -> str - _ -> to_string(ndt) - end - end - - defp format_time(%DateTime{} = dt) do - case Kith.Cldr.DateTime.to_string(dt, format: :medium) do - {:ok, str} -> str - _ -> to_string(dt) - end - end - - defp format_time(_), do: "-" -end diff --git a/lib/kith_web/router.ex b/lib/kith_web/router.ex index 4665321..79f81b1 100644 --- a/lib/kith_web/router.ex +++ b/lib/kith_web/router.ex @@ -371,13 +371,26 @@ defmodule KithWeb.Router do live "/contacts/:id/immich-review", ContactLive.ImmichReview, :index # Admin pages - live "/admin/oban", AdminLive.ObanDashboard, :index live "/settings/audit-log", SettingsLive.AuditLog, :index end post "/users/update-password", UserSessionController, :update_password end + # Admin-only Oban Web dashboard. + # The oban_dashboard macro defines its own internal live_session, so it must + # be placed outside any other live_session block. + scope "/admin" do + pipe_through [:browser, :require_authenticated_user, :require_confirmed_user] + + import Oban.Web.Router + + oban_dashboard("/oban", + on_mount: [{KithWeb.UserAuth, :require_admin}], + csp_nonce_assign_key: :csp_nonce + ) + end + # WebAuthn registration (authenticated, JSON over session) scope "/auth/webauthn", KithWeb do pipe_through [:browser_json, :require_authenticated_user] diff --git a/lib/kith_web/user_auth.ex b/lib/kith_web/user_auth.ex index e66ef93..06df9ca 100644 --- a/lib/kith_web/user_auth.ex +++ b/lib/kith_web/user_auth.ex @@ -243,6 +243,25 @@ defmodule KithWeb.UserAuth do end end + def on_mount(:require_admin, params, session, socket) do + case on_mount(:require_authenticated, params, session, socket) do + {:cont, socket} -> + if Kith.Policy.can?(socket.assigns.current_scope.user, :manage, :oban) do + {:cont, socket} + else + socket = + socket + |> Phoenix.LiveView.put_flash(:error, "Admin access required.") + |> Phoenix.LiveView.redirect(to: ~p"/dashboard") + + {:halt, socket} + end + + {:halt, _} = halt -> + halt + end + end + def on_mount(:require_sudo_mode, _params, session, socket) do socket = mount_current_scope(socket, session) diff --git a/mix.exs b/mix.exs index 9c90417..71f5468 100644 --- a/mix.exs +++ b/mix.exs @@ -73,6 +73,7 @@ defmodule Kith.MixProject do # Background Jobs {:oban, "~> 2.18"}, + {:oban_web, "~> 2.11"}, # Email {:swoosh, "~> 1.17"}, diff --git a/mix.lock b/mix.lock index a2ef26a..fb12711 100644 --- a/mix.lock +++ b/mix.lock @@ -69,6 +69,8 @@ "nimble_ownership": {:hex, :nimble_ownership, "1.0.2", "fa8a6f2d8c592ad4d79b2ca617473c6aefd5869abfa02563a77682038bf916cf", [:mix], [], "hexpm", "098af64e1f6f8609c6672127cfe9e9590a5d3fcdd82bc17a377b8692fd81a879"}, "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "oban": {:hex, :oban, "2.20.3", "e4d27336941955886cc7113420c32c63b70b64f10b27e08e3cf2b001153953cd", [:mix], [{:ecto_sql, "~> 3.10", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:ecto_sqlite3, "~> 0.9", [hex: :ecto_sqlite3, repo: "hexpm", optional: true]}, {:igniter, "~> 0.5", [hex: :igniter, repo: "hexpm", optional: true]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: true]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.20", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 1.3", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "075ffbf1279a96bec495bc63d647b08929837d70bcc0427249ffe4d1dddaec33"}, + "oban_met": {:hex, :oban_met, "1.0.6", "2a5500aff496b7ac4b830b0b03b08e920625a051bb6890981fbb53b15f1cbdc0", [:mix], [{:oban, "~> 2.19", [hex: :oban, repo: "hexpm", optional: false]}], "hexpm", "15ea3303de76225878a8e6c25a9d62bd1e2e9dd1c46ac8487d873b9f99e8dcee"}, + "oban_web": {:hex, :oban_web, "2.11.8", "be6521b5b1eb6d4182f40f5acc948ea65d243451b94c26f06a7329575748f695", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}, {:oban, "~> 2.19", [hex: :oban, repo: "hexpm", optional: false]}, {:oban_met, "~> 1.0", [hex: :oban_met, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.7", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 1.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}], "hexpm", "d0c04a836d929ef037e96be142285238275aabbafe62543bbdcc3f541d29ec30"}, "octo_fetch": {:hex, :octo_fetch, "0.4.0", "074b5ecbc08be10b05b27e9db08bc20a3060142769436242702931c418695b19", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "cf8be6f40cd519d7000bb4e84adcf661c32e59369ca2827c4e20042eda7a7fc6"}, "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, "pbkdf2_elixir": {:hex, :pbkdf2_elixir, "2.3.1", "073866b593887365d0ff50bb806d860a50f454bcda49b5b6f4658c9173c53889", [:mix], [{:comeonin, "~> 5.3", [hex: :comeonin, repo: "hexpm", optional: false]}], "hexpm", "ab4da7db8aeb2db20e02a1d416cbb46d0690658aafb4396878acef8748c9c319"}, diff --git a/test/kith/policy_test.exs b/test/kith/policy_test.exs new file mode 100644 index 0000000..ba00a5b --- /dev/null +++ b/test/kith/policy_test.exs @@ -0,0 +1,27 @@ +defmodule Kith.PolicyTest do + use ExUnit.Case, async: true + + alias Kith.Accounts.User + alias Kith.Policy + + describe "can?/3 for :oban resource" do + test "admin can manage Oban" do + assert Policy.can?(%User{role: "admin"}, :manage, :oban) + assert Policy.can?(%User{role: "admin"}, :read, :oban) + end + + test "editor cannot access Oban" do + refute Policy.can?(%User{role: "editor"}, :manage, :oban) + refute Policy.can?(%User{role: "editor"}, :read, :oban) + end + + test "viewer cannot access Oban" do + refute Policy.can?(%User{role: "viewer"}, :manage, :oban) + refute Policy.can?(%User{role: "viewer"}, :read, :oban) + end + + test "unknown role cannot access Oban" do + refute Policy.can?(%User{role: "ghost"}, :manage, :oban) + end + end +end diff --git a/test/kith_web/user_auth_test.exs b/test/kith_web/user_auth_test.exs index 22ca2de..819cfd6 100644 --- a/test/kith_web/user_auth_test.exs +++ b/test/kith_web/user_auth_test.exs @@ -282,6 +282,55 @@ defmodule KithWeb.UserAuthTest do end end + describe "on_mount :require_admin" do + test "allows users with role admin", %{conn: conn, user: user} do + user_token = Accounts.generate_user_session_token(user) + session = conn |> put_session(:user_token, user_token) |> get_session() + + socket = %LiveView.Socket{ + endpoint: KithWeb.Endpoint, + assigns: %{__changed__: %{}, flash: %{}} + } + + assert {:cont, updated_socket} = + UserAuth.on_mount(:require_admin, %{}, session, socket) + + assert updated_socket.assigns.current_scope.user.id == user.id + end + + test "halts users without admin role" do + editor = user_fixture(%{role: "editor"}) + editor = %{editor | authenticated_at: DateTime.utc_now(:second)} + user_token = Accounts.generate_user_session_token(editor) + + conn = + Phoenix.ConnTest.build_conn() + |> Map.replace!(:secret_key_base, KithWeb.Endpoint.config(:secret_key_base)) + |> Plug.Test.init_test_session(%{}) + |> put_session(:user_token, user_token) + + session = get_session(conn) + + socket = %LiveView.Socket{ + endpoint: KithWeb.Endpoint, + assigns: %{__changed__: %{}, flash: %{}} + } + + assert {:halt, _socket} = UserAuth.on_mount(:require_admin, %{}, session, socket) + end + + test "halts when no user is logged in", %{conn: conn} do + session = conn |> get_session() + + socket = %LiveView.Socket{ + endpoint: KithWeb.Endpoint, + assigns: %{__changed__: %{}, flash: %{}} + } + + assert {:halt, _socket} = UserAuth.on_mount(:require_admin, %{}, session, socket) + end + end + describe "on_mount :require_sudo_mode" do test "allows users that have authenticated in the last 10 minutes", %{conn: conn, user: user} do user_token = Accounts.generate_user_session_token(user) From edbaa2d2eba00948a2bd9682e5ca3335057971a8 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 20:59:21 +0300 Subject: [PATCH 06/58] chore: parameterize Docker host ports via .env Make every published host port configurable through env-var substitution so contributors can resolve local port conflicts without editing compose files. Defaults preserve current behavior with no .env present. Dev (docker-compose.dev.yml): MAILPIT_SMTP_PORT, MAILPIT_WEB_PORT, APP_PORT join the existing DB_PORT and MINIO_*_PORT vars. Prod (docker-compose.prod.yml): HTTP_PORT, HTTPS_PORT for Caddy. Internal container ports remain hardcoded so services can address each other on standard ports over the Docker network. --- .env.example | 25 +++++++++++++++++++++---- docker-compose.dev.yml | 17 ++++++++++++++--- docker-compose.prod.yml | 4 ++-- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index 1dffc46..131136b 100644 --- a/.env.example +++ b/.env.example @@ -30,10 +30,7 @@ KITH_HOSTNAME=localhost POSTGRES_USER=kith POSTGRES_PASSWORD=change_me POSTGRES_DB=kith_prod -# PostgreSQL port — used by Elixir app (dev/test) AND as Docker host port -# Default 5434 avoids conflicts with standard postgres (5432) -# Inside Docker, the app container overrides this to 5432 (internal network) -DB_PORT=5434 + POOL_SIZE=10 # DATABASE_SSL=false @@ -121,3 +118,23 @@ SENTRY_DSN= SENTRY_ENVIRONMENT=production # Required in production for /metrics endpoint access METRICS_TOKEN=generate-a-random-token + +# ============================================================ +# Docker Host Ports +# ============================================================ +# These configure which HOST ports Compose publishes for each service. +# Defaults match docker-compose.{dev,prod}.yml; override only on conflict. +# Internal container ports are NOT configurable (services talk to each other +# on standard ports over the Docker network). +# +# Dev stack (docker-compose.dev.yml): +DB_PORT=5434 # postgres -> host (default 5434, avoids local 5432) +MAILPIT_SMTP_PORT=1025 # mailpit SMTP listener +MAILPIT_WEB_PORT=8025 # mailpit web UI -> http://localhost:8025 +MINIO_PORT=9000 # MinIO S3 API +MINIO_CONSOLE_PORT=9001 # MinIO web console +APP_PORT=4000 # Phoenix app (only when running via Compose) +# +# Prod stack (docker-compose.prod.yml): +HTTP_PORT=80 # Caddy HTTP (redirects to HTTPS) +HTTPS_PORT=443 # Caddy HTTPS \ No newline at end of file diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 357984e..bdb4c16 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,3 +1,14 @@ +# Kith Development Docker Compose +# +# Host ports are configurable via a `.env` file in the project root (auto-loaded +# by Compose). All variables have defaults — `.env` is only needed to override. +# See `.env.example` ("Docker Host Ports" section) for the full list. +# +# Usage: +# docker compose -f docker-compose.dev.yml up -d # infra only +# docker compose -f docker-compose.dev.yml up -d postgres mailpit # subset +# docker compose -f docker-compose.dev.yml --profile app up -d # also run app + services: postgres: image: postgres:15-alpine @@ -16,8 +27,8 @@ services: mailpit: image: axllent/mailpit:latest ports: - - "1025:1025" - - "8025:8025" + - "${MAILPIT_SMTP_PORT:-1025}:1025" + - "${MAILPIT_WEB_PORT:-8025}:8025" minio: image: minio/minio:latest @@ -52,7 +63,7 @@ services: context: . dockerfile: Dockerfile.dev ports: - - "4000:4000" + - "${APP_PORT:-4000}:4000" environment: DB_HOST: postgres DB_PORT: "5432" diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 92ceb98..2f5f191 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -202,8 +202,8 @@ services: caddy: image: caddy:2-alpine ports: - - "80:80" - - "443:443" + - "${HTTP_PORT:-80}:80" + - "${HTTPS_PORT:-443}:443" volumes: - ./Caddyfile:/etc/caddy/Caddyfile:ro - caddy_data:/data From 121948951e6085086e5b67c632c77252947b9917 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 21:17:39 +0300 Subject: [PATCH 07/58] fix: extract Monica photo import to async worker with live sync_summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Photo import previously ran inline as Phase 4 of MonicaApi.crawl/5, which buried it inside the contact-crawl job and left the import_history "Photo Sync" UI panel stuck on "in progress" forever — sync_summary was never written because the refactor that deleted PhotoBatchSyncWorker (commit e474853) removed the only writers. Move the photo crawl into its own MonicaPhotoSyncWorker on the photo_sync queue, enqueued by MonicaApiCrawlWorker when api_options["photos"] is true. The worker passes credentials through job args (matching the MonicaDocumentImportWorker pattern) so the main worker can wipe the API key from the DB immediately after contact import completes. Drop the unauthenticated link fallback from the decoder — Monica's /api/photos endpoint always returns dataUrl, so the previous Req.get(link) path was likely 401'ing on protected storage URLs. If a photo lacks dataUrl, it's now surfaced as a failed entry in sync_summary with a no_data_url reason, instead of being silently dropped. The worker writes sync_summary after each page so the UI shows live progress — total/synced/failed/not_found counts plus a per-photo table — and logs each page boundary at :info under the [MonicaPhotoSync] prefix. Per-photo decisions log at :debug. Tests cover: dataUrl import + avatar set, not_found on missing contact, failed on missing dataUrl, dedup by content_hash, and mid-flight sync_summary writes between pages. --- config/config.exs | 3 +- lib/kith/imports/sources/monica_api.ex | 164 +-------- lib/kith/workers/monica_api_crawl_worker.ex | 20 +- lib/kith/workers/monica_photo_sync_worker.ex | 336 ++++++++++++++++++ test/kith/imports/sources/monica_api_test.exs | 59 --- .../workers/monica_api_crawl_worker_test.exs | 36 ++ .../workers/monica_photo_sync_worker_test.exs | 247 +++++++++++++ 7 files changed, 647 insertions(+), 218 deletions(-) create mode 100644 lib/kith/workers/monica_photo_sync_worker.ex create mode 100644 test/kith/workers/monica_photo_sync_worker_test.exs diff --git a/config/config.exs b/config/config.exs index f555214..4fb3c28 100644 --- a/config/config.exs +++ b/config/config.exs @@ -117,7 +117,8 @@ config :logger, :default_formatter, :attempt, :max_attempts, :state, - :source + :source, + :import_id ] # Cloak encryption vault — key set per-environment diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index 9267cf4..2eb8bbe 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -16,7 +16,10 @@ defmodule Kith.Imports.Sources.MonicaApi do using import_records (no API calls needed). 3. **Extra notes** — for contacts with `statistics.number_of_notes > 3`, fetch remaining notes via `GET /api/contacts/{id}/notes`. - 4. **Photos** — optionally crawl `GET /api/photos?limit=100` to import all photos. + + Photo import is handled by `Kith.Workers.MonicaPhotoSyncWorker`, enqueued + separately by `MonicaApiCrawlWorker` after this crawl completes when the + user opts in via `api_options["photos"]`. """ @behaviour Kith.Imports.Source @@ -103,19 +106,11 @@ defmodule Kith.Imports.Sources.MonicaApi do [] end - # Phase 4: Photos (optional) - photo_errors = - if opts["photos"] do - crawl_all_photos(credential, account_id, import_job) - else - [] - end - - # Phase 5-12: Additional data types (per-contact endpoints) + # Phase 4: Additional data types (per-contact endpoints) extra_data_errors = import_extra_data_types(credential, account_id, user_id, import_job, opts) - # Phase 13: Enqueue document import jobs (async, runs after main import) + # Phase 5: Enqueue document import jobs (async, runs after main import) if opts["documents"] do enqueue_document_imports(credential, account_id, user_id, import_job) end @@ -124,12 +119,11 @@ defmodule Kith.Imports.Sources.MonicaApi do acc.errors ++ ref_errors ++ notes_errors ++ - photo_errors ++ merge_result.errors ++ extra_data_errors error_count = - acc.error_count + length(ref_errors) + length(notes_errors) + length(photo_errors) + + acc.error_count + length(ref_errors) + length(notes_errors) + length(merge_result.errors) + length(extra_data_errors) {:ok, @@ -862,150 +856,6 @@ defmodule Kith.Imports.Sources.MonicaApi do end) end - # ── Phase 4: Photo crawl ──────────────────────────────────────────── - - defp crawl_all_photos(credential, account_id, import_job) do - crawl_photos_loop(credential, account_id, import_job, _page = 1, _errors = []) - end - - defp crawl_photos_loop(credential, account_id, import_job, page, errors) do - url = "#{credential.url}/api/photos" - - case api_get_json(credential, url, limit: @page_limit, page: page) do - {:ok, %{"data" => photos, "meta" => meta}} when is_list(photos) -> - last_page = meta["last_page"] || 1 - - errors = - Enum.reduce(photos, errors, fn photo, errs -> - import_api_photo(photo, account_id, import_job, errs) - end) - - if page < last_page do - crawl_photos_loop(credential, account_id, import_job, page + 1, errors) - else - errors - end - - {:error, :rate_limited} -> - errors ++ ["Rate limited fetching photos"] - - {:error, reason} -> - errors ++ ["Failed to fetch photos page #{page}: #{inspect(reason)}"] - - _ -> - errors - end - end - - defp import_api_photo(photo, account_id, import_job, errors) do - contact_id = get_in(photo, ["contact", "id"]) - source_id = to_string(contact_id) - - contact_rec = Imports.find_import_record(account_id, "monica_api", "contact", source_id) - - if contact_rec do - contact = Repo.get(Contacts.Contact, contact_rec.local_entity_id) - - if contact do - do_import_photo(contact, photo, import_job, errors) - else - errors - end - else - Logger.debug("[MonicaApi] Skipping photo for unknown contact #{source_id}") - errors - end - end - - defp do_import_photo(contact, photo, import_job, errors) do - file_name = photo["original_filename"] || "photo.jpg" - - case decode_photo_data(photo) do - {:ok, binary} -> - store_and_create_photo(contact, photo, binary, file_name, import_job, errors) - - :no_data -> - errors - - :error -> - errors ++ ["Failed to decode photo data for #{contact.first_name}"] - end - end - - defp store_and_create_photo(contact, photo, binary, file_name, import_job, errors) do - content_hash = :crypto.hash(:sha256, binary) |> Base.encode16(case: :lower) - - if Contacts.photo_exists_by_hash?(contact.id, content_hash) do - Logger.debug("[MonicaApi] Skipping duplicate photo for #{contact.first_name}") - errors - else - upload_and_record_photo(contact, photo, binary, file_name, content_hash, import_job, errors) - end - end - - defp upload_and_record_photo( - contact, - photo, - binary, - file_name, - content_hash, - import_job, - errors - ) do - key = Kith.Storage.generate_key(contact.account_id, "photos", file_name) - - case Kith.Storage.upload_binary(binary, key) do - {:ok, _} -> - attrs = %{ - "file_name" => file_name, - "storage_key" => key, - "file_size" => byte_size(binary), - "content_type" => photo["mime_type"] || "image/jpeg", - "content_hash" => content_hash - } - - create_photo_and_set_avatar(contact, photo, attrs, import_job, errors) - - {:error, reason} -> - errors ++ ["Failed to store photo for #{contact.first_name}: #{inspect(reason)}"] - end - end - - defp create_photo_and_set_avatar(contact, photo, attrs, import_job, errors) do - case Contacts.create_photo(contact, attrs) do - {:ok, photo_record} -> - maybe_record_entity(import_job, "photo", photo["uuid"], "photo", photo_record.id) - - if is_nil(contact.avatar) do - contact |> Ecto.Changeset.change(avatar: attrs["storage_key"]) |> Repo.update!() - end - - errors - - {:error, reason} -> - Logger.warning("[MonicaApi] Photo for #{contact.first_name}: #{inspect(reason)}") - errors - end - end - - defp decode_photo_data(%{"dataUrl" => "data:" <> _ = data_url}) do - case String.split(data_url, ",", parts: 2) do - [_meta, encoded] -> {:ok, Base.decode64!(encoded)} - _ -> :error - end - rescue - _ -> :error - end - - defp decode_photo_data(%{"link" => link}) when is_binary(link) and link != "" do - case Req.get(link, receive_timeout: 30_000) do - {:ok, %{status: 200, body: body}} when is_binary(body) -> {:ok, body} - _ -> :error - end - end - - defp decode_photo_data(_), do: :no_data - # ── Reference data building ────────────────────────────────────────── defp build_or_update_ref_data(account_id, contacts, nil) do diff --git a/lib/kith/workers/monica_api_crawl_worker.ex b/lib/kith/workers/monica_api_crawl_worker.ex index f366140..5714d25 100644 --- a/lib/kith/workers/monica_api_crawl_worker.ex +++ b/lib/kith/workers/monica_api_crawl_worker.ex @@ -16,6 +16,7 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do alias Kith.Imports alias Kith.Imports.Sources.MonicaApi alias Kith.Workers.DuplicateDetectionWorker + alias Kith.Workers.MonicaPhotoSyncWorker @impl Oban.Worker def perform(%Oban.Job{args: %{"import_id" => import_id}}) do @@ -51,6 +52,9 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do # Trigger duplicate detection for newly imported contacts Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) + # Enqueue photo sync (separate job) if the user opted in + maybe_enqueue_photo_sync(import_job) + Logger.info("MonicaApi import #{import_id} completed: #{inspect(summary_map)}") :ok else @@ -82,10 +86,24 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do options = import_job.api_options || %{} %{ - "photos" => options["photos"] || false, "extra_notes" => options["extra_notes"] != false } end + defp maybe_enqueue_photo_sync(import_job) do + if get_in(import_job.api_options || %{}, ["photos"]) do + # api_key is wiped from the DB immediately after this worker completes, + # so the photo sync worker receives its own copy via job args + # (same pattern as MonicaDocumentImportWorker). + %{ + "import_id" => import_job.id, + "credential_url" => import_job.api_url, + "credential_api_key" => import_job.api_key_encrypted + } + |> MonicaPhotoSyncWorker.new() + |> Oban.insert() + end + end + defp ensure_map(m) when is_map(m), do: m end diff --git a/lib/kith/workers/monica_photo_sync_worker.ex b/lib/kith/workers/monica_photo_sync_worker.ex new file mode 100644 index 0000000..6bd4868 --- /dev/null +++ b/lib/kith/workers/monica_photo_sync_worker.ex @@ -0,0 +1,336 @@ +defmodule Kith.Workers.MonicaPhotoSyncWorker do + @moduledoc """ + Imports photos for a Monica API import after the main contact crawl completes. + + Enqueued by `MonicaApiCrawlWorker` when `api_options["photos"]` is true. + Paginates `GET /api/photos`, decodes each photo's inline `dataUrl`, dedups + by SHA-256 content hash, persists to storage and the `photos` table, and + sets the owning contact's avatar if not already set. + + Writes incremental progress to `import.sync_summary` after each page so the + import-history UI shows live counts and a per-photo table. + """ + + use Oban.Worker, queue: :photo_sync, max_attempts: 3 + + require Logger + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Repo + alias Kith.Storage + + @page_limit 100 + @max_rate_limit_retries 3 + @rate_limit_sleep_ms :timer.seconds(65) + @max_photos_in_summary 500 + @log_prefix "[MonicaPhotoSync]" + + @impl Oban.Worker + def perform(%Oban.Job{ + args: %{ + "import_id" => import_id, + "credential_url" => credential_url, + "credential_api_key" => credential_api_key + } + }) do + import_job = Imports.get_import!(import_id) + Logger.metadata(import_id: import_id, worker: "MonicaPhotoSync") + Logger.info("#{@log_prefix} Starting photo sync for import #{import_id}") + + credential = %{ + url: credential_url, + api_key: credential_api_key, + req_options: Application.get_env(:kith, :monica_req_options, []) + } + + initial = empty_summary() + Imports.update_sync_summary(import_job, initial) + + case crawl_pages(credential, import_job, 1, initial) do + {:ok, final} -> + Imports.update_sync_summary(import_job, final) + + Logger.info( + "#{@log_prefix} Photo sync complete: " <> + "#{final["synced"]}/#{final["total"]} synced, " <> + "#{final["failed"]} failed, #{final["not_found"]} not_found" + ) + + :ok + + {:error, reason} -> + Logger.error("#{@log_prefix} Photo sync failed: #{inspect(reason)}") + {:error, reason} + end + end + + @impl Oban.Worker + def timeout(_job), do: :timer.minutes(30) + + # ── Page loop ─────────────────────────────────────────────────────────── + + defp crawl_pages(credential, import_job, page, summary) do + url = "#{credential.url}/api/photos" + + case api_get_json(credential, url, limit: @page_limit, page: page) do + {:ok, %{"data" => photos, "meta" => meta}} when is_list(photos) -> + last_page = meta["last_page"] || 1 + + Logger.info( + "#{@log_prefix} page #{page}: #{length(photos)} photos (#{last_page} pages total)" + ) + + page_summary = + Enum.reduce(photos, summary, fn photo, acc -> + import_one_photo(photo, import_job, acc) + end) + + Logger.info( + "#{@log_prefix} page #{page} done (running: " <> + "#{page_summary["synced"]}/#{page_summary["total"]} synced, " <> + "#{page_summary["failed"]} failed, #{page_summary["not_found"]} not_found)" + ) + + Imports.update_sync_summary(import_job, page_summary) + + if page < last_page do + crawl_pages(credential, import_job, page + 1, page_summary) + else + {:ok, page_summary} + end + + {:error, reason} -> + Logger.warning("#{@log_prefix} Failed to fetch photos page #{page}: #{inspect(reason)}") + {:error, reason} + + other -> + Logger.warning("#{@log_prefix} Unexpected response on page #{page}: #{inspect(other)}") + {:ok, summary} + end + end + + # ── Per-photo flow ───────────────────────────────────────────────────── + + defp import_one_photo(photo, import_job, summary) do + summary = bump(summary, "total") + uuid = photo["uuid"] + monica_contact_id = get_in(photo, ["contact", "id"]) + + case resolve_contact(import_job.account_id, monica_contact_id) do + {:ok, contact} -> + handle_decode(contact, photo, import_job, summary, uuid) + + {:not_found, reason} -> + Logger.info("#{@log_prefix} photo #{uuid}: #{reason}") + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => monica_contact_id, + "status" => "not_found", + "reason" => reason + }) + |> bump("not_found") + end + end + + defp resolve_contact(_account_id, nil), + do: {:not_found, "missing contact id in /api/photos response"} + + defp resolve_contact(account_id, monica_contact_id) do + source_id = to_string(monica_contact_id) + + case Imports.find_import_record(account_id, "monica_api", "contact", source_id) do + nil -> + {:not_found, "contact #{source_id} not in import_records"} + + %{local_entity_id: local_id} -> + case Repo.get(Contacts.Contact, local_id) do + nil -> + {:not_found, "local contact #{local_id} not found"} + + %{deleted_at: deleted_at} when not is_nil(deleted_at) -> + {:not_found, "local contact #{local_id} is soft-deleted"} + + contact -> + {:ok, contact} + end + end + end + + defp handle_decode(contact, photo, import_job, summary, uuid) do + case decode_photo_data(photo) do + {:ok, binary} -> + handle_dedup(contact, photo, binary, import_job, summary, uuid) + + {:error, reason} -> + Logger.warning("#{@log_prefix} photo #{uuid}: failed (#{reason})") + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "failed", + "reason" => to_string(reason) + }) + |> bump("failed") + end + end + + defp handle_dedup(contact, photo, binary, import_job, summary, uuid) do + content_hash = :crypto.hash(:sha256, binary) |> Base.encode16(case: :lower) + + if Contacts.photo_exists_by_hash?(contact.id, content_hash) do + Logger.debug( + "#{@log_prefix} photo #{uuid}: duplicate hash for contact #{contact.id}, skipping" + ) + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "synced", + "reason" => "duplicate" + }) + |> bump("synced") + else + do_upload(contact, photo, binary, content_hash, import_job, summary, uuid) + end + end + + defp do_upload(contact, photo, binary, content_hash, import_job, summary, uuid) do + file_name = photo["original_filename"] || "photo.jpg" + key = Storage.generate_key(contact.account_id, "photos", file_name) + + attrs = %{ + "file_name" => file_name, + "storage_key" => key, + "file_size" => byte_size(binary), + "content_type" => photo["mime_type"] || "image/jpeg", + "content_hash" => content_hash + } + + with {:ok, _} <- Storage.upload_binary(binary, key), + {:ok, photo_record} <- Contacts.create_photo(contact, attrs) do + maybe_record_entity(import_job, uuid, photo_record.id) + maybe_set_avatar(contact, key) + + Logger.debug( + "#{@log_prefix} photo #{uuid} → contact #{contact.id}: synced " <> + "(hash #{String.slice(content_hash, 0, 8)})" + ) + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "synced" + }) + |> bump("synced") + else + {:error, reason} -> + reason_str = inspect(reason) + Logger.warning("#{@log_prefix} photo #{uuid}: failed (#{reason_str})") + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "failed", + "reason" => reason_str + }) + |> bump("failed") + end + end + + defp maybe_set_avatar(%{avatar: nil} = contact, key) do + contact + |> Ecto.Changeset.change(avatar: key) + |> Repo.update!() + end + + defp maybe_set_avatar(_contact, _key), do: :ok + + defp maybe_record_entity(_import_job, nil, _local_id), do: :ok + + defp maybe_record_entity(import_job, uuid, local_id), + do: Imports.record_imported_entity(import_job, "photo", uuid, "photo", local_id) + + # ── Summary helpers ──────────────────────────────────────────────────── + + defp empty_summary do + %{ + "total" => 0, + "synced" => 0, + "failed" => 0, + "not_found" => 0, + "photos" => [] + } + end + + defp bump(summary, key), do: Map.update!(summary, key, &(&1 + 1)) + + defp record_photo(summary, entry) do + Map.update!(summary, "photos", fn list -> + [entry | Enum.take(list, @max_photos_in_summary - 1)] + end) + end + + # ── Decoding ─────────────────────────────────────────────────────────── + + defp decode_photo_data(%{"dataUrl" => "data:" <> _ = data_url}) do + case String.split(data_url, ",", parts: 2) do + [_meta, encoded] -> + case Base.decode64(encoded) do + {:ok, binary} -> {:ok, binary} + :error -> {:error, :base64_decode_failed} + end + + _ -> + {:error, :malformed_data_url} + end + end + + defp decode_photo_data(_), do: {:error, :no_data_url} + + # ── HTTP helpers ─────────────────────────────────────────────────────── + + defp api_get_json(credential, url, params), + do: api_get_json_with_retry(credential, url, params, 0) + + defp api_get_json_with_retry(_credential, _url, _params, retries) + when retries >= @max_rate_limit_retries, + do: {:error, :rate_limited} + + defp api_get_json_with_retry(credential, url, params, retries) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> + {:ok, body} + + {:ok, %{status: 429}} -> + Logger.warning( + "#{@log_prefix} rate limited, sleeping #{@rate_limit_sleep_ms}ms (retry #{retries + 1})" + ) + + Process.sleep(@rate_limit_sleep_ms) + api_get_json_with_retry(credential, url, params, retries + 1) + + {:ok, %{status: status}} -> + {:error, "Unexpected status: #{status}"} + + {:error, reason} -> + {:error, reason} + end + end + + defp api_get(credential, url, params) do + headers = [ + {"Authorization", "Bearer #{credential.api_key}"}, + {"Accept", "application/json"} + ] + + options = [headers: headers, params: params] ++ Map.get(credential, :req_options, []) + Req.get(url, options) + end +end diff --git a/test/kith/imports/sources/monica_api_test.exs b/test/kith/imports/sources/monica_api_test.exs index f87e9b7..85c8aab 100644 --- a/test/kith/imports/sources/monica_api_test.exs +++ b/test/kith/imports/sources/monica_api_test.exs @@ -585,65 +585,6 @@ defmodule Kith.Imports.Sources.MonicaApiTest do end end - # ── crawl/5 — photo crawl ──────────────────────────────────────────── - - describe "crawl/5 — photo crawl" do - test "imports photos from paginated photos endpoint", %{user: user, account_id: account_id} do - # Small 1x1 JPEG encoded as data URL - pixel = Base.encode64(<<0xFF, 0xD8, 0xFF, 0xE0>>) - data_url = "data:image/jpeg;base64,#{pixel}" - - contacts = [contact_json(id: 1, first_name: "PhotoPerson")] - - photos = [ - photo_json( - id: 1, - data_url: data_url, - contact: contact_short_json(1, Ecto.UUID.generate(), "PhotoPerson", "Test") - ) - ] - - {:ok, agent} = Agent.start_link(fn -> 0 end) - - Req.Test.stub(@stub_name, fn conn -> - call = Agent.get_and_update(agent, fn n -> {n + 1, n + 1} end) - - if call == 1 do - Req.Test.json(conn, contacts_page_json(contacts)) - else - Req.Test.json(conn, photos_page_json(photos)) - end - end) - - import_job = api_import_fixture(account_id, user.id) - - assert {:ok, _} = - MonicaApi.crawl(account_id, user.id, credential(), import_job, %{"photos" => true}) - - # Verify photos endpoint was called - assert Agent.get(agent, & &1) == 2 - Agent.stop(agent) - end - - test "skips photos when opt-out", %{user: user, account_id: account_id} do - contacts = [contact_json(id: 1, first_name: "NoPhotos")] - - {:ok, agent} = Agent.start_link(fn -> 0 end) - - Req.Test.stub(@stub_name, fn conn -> - Agent.update(agent, &(&1 + 1)) - Req.Test.json(conn, contacts_page_json(contacts)) - end) - - import_job = api_import_fixture(account_id, user.id) - assert {:ok, _} = MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) - - # Only contacts page, no photos - assert Agent.get(agent, & &1) == 1 - Agent.stop(agent) - end - end - # ── crawl/5 — rate limiting ────────────────────────────────────────── describe "crawl/5 — rate limiting" do diff --git a/test/kith/workers/monica_api_crawl_worker_test.exs b/test/kith/workers/monica_api_crawl_worker_test.exs index 2fa6f88..082d567 100644 --- a/test/kith/workers/monica_api_crawl_worker_test.exs +++ b/test/kith/workers/monica_api_crawl_worker_test.exs @@ -4,6 +4,7 @@ defmodule Kith.Workers.MonicaApiCrawlWorkerTest do alias Kith.Imports alias Kith.Workers.MonicaApiCrawlWorker + alias Kith.Workers.MonicaPhotoSyncWorker import Kith.AccountsFixtures import Kith.ContactsFixtures @@ -59,5 +60,40 @@ defmodule Kith.Workers.MonicaApiCrawlWorkerTest do assert import_job.api_options["photos"] == true assert import_job.api_options["extra_notes"] == false end + + test "enqueues MonicaPhotoSyncWorker when photos opt-in", %{ + user: user, + account_id: account_id + } do + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"photos" => true} + }) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + assert_enqueued( + worker: MonicaPhotoSyncWorker, + args: %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key" + } + ) + end + + test "does not enqueue MonicaPhotoSyncWorker when photos opt-out", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture_with_stub(account_id, user.id) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + refute_enqueued(worker: MonicaPhotoSyncWorker) + end end end diff --git a/test/kith/workers/monica_photo_sync_worker_test.exs b/test/kith/workers/monica_photo_sync_worker_test.exs new file mode 100644 index 0000000..d95456a --- /dev/null +++ b/test/kith/workers/monica_photo_sync_worker_test.exs @@ -0,0 +1,247 @@ +defmodule Kith.Workers.MonicaPhotoSyncWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Repo + alias Kith.Workers.MonicaPhotoSyncWorker + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + import Kith.MonicaApiFixtures + + @stub_name :monica_photo_sync_stub + @pixel_data_url "data:image/jpeg;base64,#{Base.encode64(<<0xFF, 0xD8, 0xFF, 0xE0>>)}" + @other_pixel_data_url "data:image/png;base64,#{Base.encode64(<<0x89, 0x50, 0x4E, 0x47>>)}" + + setup do + user = user_fixture() + seed_reference_data!() + + Application.put_env( + :kith, + :monica_req_options, + plug: {Req.Test, @stub_name}, + retry: false + ) + + on_exit(fn -> Application.delete_env(:kith, :monica_req_options) end) + + %{user: user, account_id: user.account_id} + end + + defp api_import_fixture(account_id, user_id) do + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"photos" => true} + }) + end + + defp job_args(import_job), + do: %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key" + } + + defp register_imported_contact!(import_job, contact, monica_id) do + {:ok, _rec} = + Imports.record_imported_entity( + import_job, + "contact", + to_string(monica_id), + "contact", + contact.id + ) + end + + describe "perform/1 — happy path" do + test "imports photo with dataUrl, sets avatar, writes sync_summary", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture(account_id, user.id) + contact = contact_fixture(account_id, %{first_name: "PhotoPerson"}) + register_imported_contact!(import_job, contact, 964) + + photo = + photo_json( + id: 35, + data_url: @pixel_data_url, + contact: contact_short_json(964, Ecto.UUID.generate(), "PhotoPerson", "Test") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert [photo_row] = Contacts.list_photos(contact.id) + assert photo_row.contact_id == contact.id + assert photo_row.content_hash != nil + + reloaded_contact = Repo.get!(Contacts.Contact, contact.id) + assert reloaded_contact.avatar == photo_row.storage_key + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["total"] == 1 + assert updated.sync_summary["synced"] == 1 + assert updated.sync_summary["failed"] == 0 + assert updated.sync_summary["not_found"] == 0 + assert [%{"status" => "synced", "contact_id" => cid}] = updated.sync_summary["photos"] + assert cid == contact.id + end + end + + describe "perform/1 — not_found" do + test "marks photo as not_found when contact has no import_record", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture(account_id, user.id) + + photo = + photo_json( + id: 100, + data_url: @pixel_data_url, + contact: contact_short_json(9999, Ecto.UUID.generate(), "Unknown", "Person") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert Repo.aggregate(Contacts.Photo, :count, :id) == 0 + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["not_found"] == 1 + assert updated.sync_summary["synced"] == 0 + assert [%{"status" => "not_found", "reason" => reason}] = updated.sync_summary["photos"] + assert reason =~ "import_records" + end + end + + describe "perform/1 — failed" do + test "marks photo as failed when dataUrl is missing", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + contact = contact_fixture(account_id, %{first_name: "NoData"}) + register_imported_contact!(import_job, contact, 200) + + photo = + photo_json( + id: 200, + data_url: nil, + link: nil, + contact: contact_short_json(200, Ecto.UUID.generate(), "NoData", "Person") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert Contacts.list_photos(contact.id) == [] + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["failed"] == 1 + assert [%{"status" => "failed", "reason" => "no_data_url"}] = updated.sync_summary["photos"] + end + end + + describe "perform/1 — dedup" do + test "dedups by content_hash on second run, still counts as synced", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture(account_id, user.id) + contact = contact_fixture(account_id, %{first_name: "Dup"}) + register_imported_contact!(import_job, contact, 300) + + photo = + photo_json( + id: 300, + data_url: @pixel_data_url, + contact: contact_short_json(300, Ecto.UUID.generate(), "Dup", "Person") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert [_only_one] = Contacts.list_photos(contact.id) + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["synced"] == 1 + assert updated.sync_summary["total"] == 1 + [entry] = updated.sync_summary["photos"] + assert entry["status"] == "synced" + assert entry["reason"] == "duplicate" + end + end + + describe "perform/1 — incremental progress" do + test "writes sync_summary after each page", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + contact_a = contact_fixture(account_id, %{first_name: "PageA"}) + contact_b = contact_fixture(account_id, %{first_name: "PageB"}) + register_imported_contact!(import_job, contact_a, 401) + register_imported_contact!(import_job, contact_b, 402) + + page1_photo = + photo_json( + id: 401, + data_url: @pixel_data_url, + contact: contact_short_json(401, Ecto.UUID.generate(), "PageA", "Test") + ) + + page2_photo = + photo_json( + id: 402, + data_url: @other_pixel_data_url, + contact: contact_short_json(402, Ecto.UUID.generate(), "PageB", "Test") + ) + + test_pid = self() + + Req.Test.stub(@stub_name, fn conn -> + page = conn.query_params["page"] || "1" + + case page do + "1" -> + # Mid-flight snapshot: by the time we serve page 2, page 1 must have been + # persisted to sync_summary. + send(test_pid, :page_1_requested) + Req.Test.json(conn, photos_page_json([page1_photo], 1, 2, 2)) + + "2" -> + updated = Imports.get_import!(import_job.id) + send(test_pid, {:mid_flight_summary, updated.sync_summary}) + Req.Test.json(conn, photos_page_json([page2_photo], 2, 2, 2)) + end + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert_received :page_1_requested + assert_received {:mid_flight_summary, mid} + # After page 1 completes, exactly one photo should be recorded. + assert mid["total"] == 1 + assert mid["synced"] == 1 + + final = Imports.get_import!(import_job.id) + assert final.sync_summary["total"] == 2 + assert final.sync_summary["synced"] == 2 + end + end +end From 77496f90e230d181830204ffa43ff4f8a6786265 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 21:28:09 +0300 Subject: [PATCH 08/58] fix: queue MonicaPhotoSyncWorker on :imports instead of :photo_sync The :photo_sync queue was removed in commit e474853 along with PhotoBatchSyncWorker, but the Oban queues config in config.exs was updated to drop it. Jobs queued to :photo_sync would sit forever with no consumers. Switch to :imports to match MonicaApiCrawlWorker and MonicaDocumentImportWorker. --- lib/kith/workers/monica_photo_sync_worker.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kith/workers/monica_photo_sync_worker.ex b/lib/kith/workers/monica_photo_sync_worker.ex index 6bd4868..8554741 100644 --- a/lib/kith/workers/monica_photo_sync_worker.ex +++ b/lib/kith/workers/monica_photo_sync_worker.ex @@ -11,7 +11,7 @@ defmodule Kith.Workers.MonicaPhotoSyncWorker do import-history UI shows live counts and a per-photo table. """ - use Oban.Worker, queue: :photo_sync, max_attempts: 3 + use Oban.Worker, queue: :imports, max_attempts: 3 require Logger From 34cdb804e3317b73289ef4bf07dfb51bbed49d56 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 21:36:15 +0300 Subject: [PATCH 09/58] docs: drop stale photo-sync references after Monica import refactor - CLAUDE.md: remove photo_sync + api_supplement from the Oban queues list (those queues were removed in commit e474853); update queue count from 9 to 7 to match config/config.exs. - MonicaApiCrawlWorker moduledoc: clarify that photo import runs as a separate MonicaPhotoSyncWorker job, not inline. - Delete docs/superpowers/specs/2026-03-21-extensible-import-system-design.md and docs/superpowers/plans/2026-03-22-extensible-import-system.md. Both describe the pre-refactor design (PhotoSyncWorker, ApiSupplementWorker, file-based Monica import, per-photo job model) that no longer exists. CLAUDE.md plus the live moduledocs are now the source of truth. --- CLAUDE.md | 4 +- .../2026-03-22-extensible-import-system.md | 2859 ----------------- ...6-03-21-extensible-import-system-design.md | 408 --- lib/kith/workers/monica_api_crawl_worker.ex | 8 +- 4 files changed, 7 insertions(+), 3272 deletions(-) delete mode 100644 docs/superpowers/plans/2026-03-22-extensible-import-system.md delete mode 100644 docs/superpowers/specs/2026-03-21-extensible-import-system-design.md diff --git a/CLAUDE.md b/CLAUDE.md index fe5b8d7..8cb088e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -75,7 +75,7 @@ lib/kith/ # Domain layer (contexts + schemas) storage/ # File storage abstraction (local disk / S3) tasks/ # Personal tasks vcard/ # vCard parser + serializer - workers/ # 16 Oban workers across 9 queues + workers/ # 16 Oban workers across 7 queues lib/kith_web/ # Web layer controllers/api/ # REST API controllers (bearer token auth, cursor pagination) @@ -106,7 +106,7 @@ default queries. 30-day trash before permanent purge via `ContactPurgeWorker`. ### Oban background jobs Workers live in `lib/kith/workers/`. Queues: default, mailers, reminders, exports, -imports, immich, purge, photo_sync, api_supplement. Four cron jobs run nightly/weekly. +imports, immich, purge. Four cron jobs run nightly/weekly. Tests use `Oban.Testing` — Oban is disabled in test env. ### REST API conventions diff --git a/docs/superpowers/plans/2026-03-22-extensible-import-system.md b/docs/superpowers/plans/2026-03-22-extensible-import-system.md deleted file mode 100644 index e754690..0000000 --- a/docs/superpowers/plans/2026-03-22-extensible-import-system.md +++ /dev/null @@ -1,2859 +0,0 @@ -# Extensible Import System Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Build an extensible import framework supporting multiple data sources (VCF, Monica CRM), with a behaviour-based plugin architecture, per-contact transactions, import tracking via `import_records`, and a wizard UI with real-time progress. - -**Architecture:** Generic `imports`/`import_records` tables track jobs and source-ID-to-local-ID mappings. A `Source` behaviour defines the plugin contract. `ImportSourceWorker` (Oban) orchestrates any source. Monica source processes in 5 phases: reference data → contacts → children → cross-references → async photo/API sync. Separate Oban workers handle photo downloads and API supplements with rate-limit-aware staggering. - -**Tech Stack:** Elixir, Ecto, Oban, Phoenix LiveView, PostgreSQL, Cloak (encryption) - -**Spec:** `docs/superpowers/specs/2026-03-21-extensible-import-system-design.md` - -**Dependency:** `docs/superpowers/plans/2026-03-22-contact-first-met-fields.md` — must be implemented first. - ---- - -## File Structure - -| File | Action | Responsibility | -|---|---|---| -| `priv/repo/migrations/TIMESTAMP_create_imports_and_import_records.exs` | Create | Migration: imports + import_records tables, indexes, concurrent guard | -| `lib/kith/imports/source.ex` | Create | Source behaviour definition | -| `lib/kith/imports/import.ex` | Create | Import schema (job tracking) | -| `lib/kith/imports/import_record.ex` | Create | ImportRecord schema (source→local ID mapping) | -| `lib/kith/imports.ex` | Create | Imports context module | -| `lib/kith/imports/sources/vcard.ex` | Create | VCard source (wraps existing parser) | -| `lib/kith/imports/sources/monica.ex` | Create | Monica source implementation | -| `lib/kith/workers/import_source_worker.ex` | Create | Generic import Oban worker | -| `lib/kith/workers/photo_sync_worker.ex` | Create | Photo download Oban worker | -| `lib/kith/workers/api_supplement_worker.ex` | Create | API data supplement Oban worker | -| `lib/kith/workers/import_file_cleanup_worker.ex` | Create | Periodic cleanup (30-day retention) | -| `lib/kith_web/live/import_wizard_live.ex` | Create | Import wizard LiveView (replaces existing) | -| `lib/kith_web/live/components/monica_import_component.ex` | Create | Monica-specific form/validation/summary | -| `lib/kith_web/live/components/vcard_import_component.ex` | Create | VCard import UI (wraps existing) | -| `config/config.exs` | Modify | Add photo_sync + api_supplement Oban queues, cleanup cron | -| `lib/kith/contacts/photo.ex` | Modify | Add `pending_sync?/1` helper | -| `test/support/fixtures/imports_fixtures.ex` | Create | Test fixtures for imports | -| `test/kith/imports_test.exs` | Create | Context module tests | -| `test/kith/imports/sources/vcard_test.exs` | Create | VCard source tests | -| `test/kith/imports/sources/monica_test.exs` | Create | Monica source tests | -| `test/kith/workers/import_source_worker_test.exs` | Create | Worker tests | -| `test/kith/workers/photo_sync_worker_test.exs` | Create | Photo sync tests | -| `test/kith/workers/api_supplement_worker_test.exs` | Create | API supplement tests | - ---- - -### Task 1: Migration — Create imports and import_records tables - -**Files:** -- Create: `priv/repo/migrations/TIMESTAMP_create_imports_and_import_records.exs` - -- [ ] **Step 1: Generate the migration file** - -Run: `cd /Users/basharqassis/projects/kith && mix ecto.gen.migration create_imports_and_import_records` - -- [ ] **Step 2: Write the migration** - -```elixir -defmodule Kith.Repo.Migrations.CreateImportsAndImportRecords do - use Ecto.Migration - - def change do - create table(:imports) do - add :account_id, references(:accounts, on_delete: :delete_all), null: false - add :user_id, references(:users, on_delete: :nilify_all), null: false - add :source, :string, null: false - add :status, :string, null: false, default: "pending" - add :file_name, :string - add :file_size, :integer - add :file_storage_key, :string - add :api_url, :string - add :api_key_encrypted, :binary - add :api_options, :map - add :summary, :map - add :started_at, :utc_datetime - add :completed_at, :utc_datetime - timestamps(type: :utc_datetime) - end - - create index(:imports, [:account_id]) - - # Concurrent import guard: only one pending/processing import per account - create unique_index(:imports, [:account_id], - where: "status IN ('pending', 'processing')", - name: :imports_one_active_per_account_idx - ) - - create table(:import_records) do - add :account_id, references(:accounts, on_delete: :delete_all), null: false - add :import_id, references(:imports, on_delete: :delete_all), null: false - add :source, :string, null: false - add :source_entity_type, :string, null: false - add :source_entity_id, :string, null: false - add :local_entity_type, :string, null: false - add :local_entity_id, :bigint, null: false - timestamps(type: :utc_datetime) - end - - create unique_index(:import_records, - [:account_id, :source, :source_entity_type, :source_entity_id], - name: :import_records_source_unique_idx - ) - - create index(:import_records, [:import_id]) - create index(:import_records, [:local_entity_type, :local_entity_id]) - end -end -``` - -- [ ] **Step 3: Run the migration** - -Run: `cd /Users/basharqassis/projects/kith && mix ecto.migrate` -Expected: Migration runs successfully. - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add priv/repo/migrations/*create_imports_and_import_records* -git commit -m "feat: add imports and import_records tables" -``` - ---- - -### Task 2: Source behaviour definition - -**Files:** -- Create: `lib/kith/imports/source.ex` - -- [ ] **Step 1: Create the Source behaviour** - -```elixir -defmodule Kith.Imports.Source do - @moduledoc """ - Behaviour for import source plugins. - - Each source (VCard, Monica, etc.) implements this behaviour to define - how to validate, parse, and import data from that source. - """ - - @type opts :: map() - @type credential :: %{url: String.t(), api_key: String.t()} - @type import_summary :: %{ - contacts: non_neg_integer(), - notes: non_neg_integer(), - skipped: non_neg_integer(), - error_count: non_neg_integer(), - errors: [String.t()] - } - - @callback name() :: String.t() - @callback file_types() :: [String.t()] - @callback validate_file(binary()) :: {:ok, map()} | {:error, String.t()} - @callback parse_summary(binary()) :: {:ok, map()} | {:error, String.t()} - @callback import(account_id :: integer(), user_id :: integer(), data :: binary(), opts()) :: - {:ok, import_summary()} | {:error, term()} - @callback supports_api?() :: boolean() - - @callback test_connection(credential()) :: :ok | {:error, String.t()} - @callback fetch_photo(credential(), resource_id :: String.t()) :: - {:ok, binary()} | {:error, term()} - @callback api_supplement_options() :: [ - %{key: atom(), label: String.t(), description: String.t()} - ] - @callback fetch_supplement(credential(), contact_source_id :: String.t(), key :: atom()) :: - {:ok, map()} | {:error, term()} - - @optional_callbacks [test_connection: 1, fetch_photo: 2, api_supplement_options: 0, fetch_supplement: 3] -end -``` - -- [ ] **Step 2: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/source.ex -git commit -m "feat: define Source behaviour for import plugins" -``` - ---- - -### Task 3: Import and ImportRecord schemas - -**Files:** -- Create: `lib/kith/imports/import.ex` -- Create: `lib/kith/imports/import_record.ex` - -- [ ] **Step 1: Write the Import schema** - -```elixir -defmodule Kith.Imports.Import do - use Ecto.Schema - import Ecto.Changeset - - @statuses ~w(pending processing completed failed cancelled) - - schema "imports" do - field :source, :string - field :status, :string, default: "pending" - field :file_name, :string - field :file_size, :integer - field :file_storage_key, :string - field :api_url, :string - field :api_key_encrypted, Kith.Vault.EncryptedBinary - field :api_options, :map - field :summary, :map - field :started_at, :utc_datetime - field :completed_at, :utc_datetime - - belongs_to :account, Kith.Accounts.Account - belongs_to :user, Kith.Accounts.User - - has_many :import_records, Kith.Imports.ImportRecord - - timestamps(type: :utc_datetime) - end - - def statuses, do: @statuses - - def create_changeset(import, attrs) do - import - |> cast(attrs, [ - :source, :file_name, :file_size, :file_storage_key, - :api_url, :api_key_encrypted, :api_options, - :account_id, :user_id - ]) - |> validate_required([:source, :account_id, :user_id]) - |> validate_inclusion(:source, ["monica", "vcard"]) - |> foreign_key_constraint(:account_id) - |> foreign_key_constraint(:user_id) - |> unique_constraint(:account_id, name: :imports_one_active_per_account_idx, - message: "an import is already in progress") - end - - def status_changeset(import, status, attrs \\ %{}) do - import - |> cast(attrs, [:summary, :started_at, :completed_at]) - |> put_change(:status, status) - |> validate_inclusion(:status, @statuses) - end -end -``` - -- [ ] **Step 2: Write the ImportRecord schema** - -```elixir -defmodule Kith.Imports.ImportRecord do - use Ecto.Schema - import Ecto.Changeset - - schema "import_records" do - field :source, :string - field :source_entity_type, :string - field :source_entity_id, :string - field :local_entity_type, :string - field :local_entity_id, :integer - - belongs_to :account, Kith.Accounts.Account - belongs_to :import, Kith.Imports.Import - - timestamps(type: :utc_datetime) - end - - def changeset(record, attrs) do - record - |> cast(attrs, [ - :source, :source_entity_type, :source_entity_id, - :local_entity_type, :local_entity_id, - :account_id, :import_id - ]) - |> validate_required([ - :source, :source_entity_type, :source_entity_id, - :local_entity_type, :local_entity_id, - :account_id, :import_id - ]) - |> unique_constraint( - [:account_id, :source, :source_entity_type, :source_entity_id], - name: :import_records_source_unique_idx - ) - end -end -``` - -- [ ] **Step 3: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/import.ex lib/kith/imports/import_record.ex -git commit -m "feat: add Import and ImportRecord Ecto schemas" -``` - ---- - -### Task 4: Imports context module - -**Files:** -- Create: `lib/kith/imports.ex` -- Create: `test/support/fixtures/imports_fixtures.ex` -- Create: `test/kith/imports_test.exs` - -- [ ] **Step 1: Write failing tests for context functions** - -Create `test/kith/imports_test.exs`: - -```elixir -defmodule Kith.ImportsTest do - use Kith.DataCase, async: true - - alias Kith.Imports - alias Kith.Imports.{Import, ImportRecord} - - import Kith.AccountsFixtures - import Kith.ContactsFixtures - - setup do - user = user_fixture() - %{user: user, account_id: user.account_id} - end - - describe "create_import/3" do - test "creates an import with valid attrs", %{account_id: account_id, user: user} do - attrs = %{source: "monica", file_name: "export.json", file_size: 1024} - assert {:ok, %Import{} = import} = Imports.create_import(account_id, user.id, attrs) - assert import.source == "monica" - assert import.status == "pending" - assert import.account_id == account_id - end - - test "rejects concurrent imports for same account", %{account_id: account_id, user: user} do - attrs = %{source: "monica", file_name: "export.json", file_size: 1024} - {:ok, _} = Imports.create_import(account_id, user.id, attrs) - assert {:error, :import_in_progress} = Imports.create_import(account_id, user.id, attrs) - end - end - - describe "resolve_source/1" do - test "resolves monica" do - assert Imports.resolve_source("monica") == {:ok, Kith.Imports.Sources.Monica} - end - - test "resolves vcard" do - assert Imports.resolve_source("vcard") == {:ok, Kith.Imports.Sources.VCard} - end - - test "rejects unknown source" do - assert Imports.resolve_source("unknown") == {:error, :unknown_source} - end - end - - describe "record_imported_entity/5" do - test "creates a new import record", %{account_id: account_id, user: user} do - {:ok, import} = Imports.create_import(account_id, user.id, %{source: "monica"}) - contact = contact_fixture(account_id) - - assert {:ok, %ImportRecord{}} = - Imports.record_imported_entity(import, "contact", "uuid-123", "contact", contact.id) - end - - test "upserts on re-import (updates import_id)", %{account_id: account_id, user: user} do - {:ok, import1} = Imports.create_import(account_id, user.id, %{source: "monica"}) - contact = contact_fixture(account_id) - - {:ok, rec1} = Imports.record_imported_entity(import1, "contact", "uuid-123", "contact", contact.id) - - # Complete first import so we can create a second - Imports.update_import_status(import1, "completed", %{completed_at: DateTime.utc_now()}) - - {:ok, import2} = Imports.create_import(account_id, user.id, %{source: "monica"}) - {:ok, rec2} = Imports.record_imported_entity(import2, "contact", "uuid-123", "contact", contact.id) - - assert rec2.id == rec1.id - assert rec2.import_id == import2.id - end - end - - describe "find_import_record/4" do - test "finds existing record", %{account_id: account_id, user: user} do - {:ok, import} = Imports.create_import(account_id, user.id, %{source: "monica"}) - contact = contact_fixture(account_id) - Imports.record_imported_entity(import, "contact", "uuid-123", "contact", contact.id) - - assert %ImportRecord{} = Imports.find_import_record(account_id, "monica", "contact", "uuid-123") - end - - test "returns nil for nonexistent", %{account_id: account_id} do - assert is_nil(Imports.find_import_record(account_id, "monica", "contact", "missing")) - end - end - - describe "update_import_status/3" do - test "updates status and optional fields", %{account_id: account_id, user: user} do - {:ok, import} = Imports.create_import(account_id, user.id, %{source: "monica"}) - now = DateTime.utc_now() |> DateTime.truncate(:second) - - {:ok, updated} = Imports.update_import_status(import, "processing", %{started_at: now}) - assert updated.status == "processing" - assert updated.started_at == now - end - end -end -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports_test.exs -v` -Expected: FAIL — module `Kith.Imports` not found. - -- [ ] **Step 3: Write the Imports context module** - -Create `lib/kith/imports.ex`: - -```elixir -defmodule Kith.Imports do - @moduledoc """ - The Imports context — manages import jobs, source resolution, and import record tracking. - """ - - import Ecto.Query, warn: false - alias Kith.Repo - alias Kith.Imports.{Import, ImportRecord} - - @sources %{ - "monica" => Kith.Imports.Sources.Monica, - "vcard" => Kith.Imports.Sources.VCard - } - - ## Import Jobs - - def create_import(account_id, user_id, attrs) do - # Application-level check first (friendlier error) - if has_active_import?(account_id) do - {:error, :import_in_progress} - else - %Import{account_id: account_id, user_id: user_id} - |> Import.create_changeset(attrs) - |> Repo.insert() - |> case do - {:ok, import} -> {:ok, import} - {:error, %{errors: [{:account_id, {_, [constraint: :unique, constraint_name: "imports_one_active_per_account_idx"]}} | _]}} -> - {:error, :import_in_progress} - {:error, changeset} -> {:error, changeset} - end - end - end - - def get_import!(id), do: Repo.get!(Import, id) - - def get_import(id), do: Repo.get(Import, id) - - def update_import_status(%Import{} = import, status, attrs \\ %{}) do - import - |> Import.status_changeset(status, attrs) - |> Repo.update() - end - - def cancel_import(%Import{} = import) do - update_import_status(import, "cancelled") - end - - def get_active_import(account_id) do - Import - |> where([i], i.account_id == ^account_id) - |> where([i], i.status in ["pending", "processing"]) - |> Repo.one() - end - - defp has_active_import?(account_id) do - Import - |> where([i], i.account_id == ^account_id) - |> where([i], i.status in ["pending", "processing"]) - |> Repo.exists?() - end - - ## Source Resolution - - def resolve_source(source) when is_binary(source) do - case Map.get(@sources, source) do - nil -> {:error, :unknown_source} - mod -> {:ok, mod} - end - end - - ## Import Records - - def find_import_record(account_id, source, source_entity_type, source_entity_id) do - ImportRecord - |> where([r], r.account_id == ^account_id) - |> where([r], r.source == ^source) - |> where([r], r.source_entity_type == ^source_entity_type) - |> where([r], r.source_entity_id == ^source_entity_id) - |> Repo.one() - end - - def record_imported_entity(%Import{} = import, source_entity_type, source_entity_id, local_entity_type, local_entity_id) do - now = DateTime.utc_now() |> DateTime.truncate(:second) - - %ImportRecord{} - |> ImportRecord.changeset(%{ - account_id: import.account_id, - import_id: import.id, - source: import.source, - source_entity_type: source_entity_type, - source_entity_id: source_entity_id, - local_entity_type: local_entity_type, - local_entity_id: local_entity_id - }) - |> Repo.insert( - on_conflict: [set: [import_id: import.id, updated_at: now]], - conflict_target: {:unsafe_fragment, ~s|("account_id", "source", "source_entity_type", "source_entity_id")|}, - returning: true - ) - end - - def wipe_api_key(%Import{} = import) do - import - |> Ecto.Changeset.change(api_key_encrypted: nil) - |> Repo.update() - end - - def pending_async_jobs_count(import_id) do - Oban.Job - |> where([j], fragment("? ->> 'import_id' = ?", j.args, ^to_string(import_id))) - |> where([j], j.state in ["available", "scheduled", "executing", "retryable"]) - |> Repo.aggregate(:count) - end -end -``` - -- [ ] **Step 4: Create test fixtures** - -Create `test/support/fixtures/imports_fixtures.ex`: - -```elixir -defmodule Kith.ImportsFixtures do - @moduledoc "Test helpers for the Imports context." - - alias Kith.Imports - - def import_fixture(account_id, user_id, attrs \\ %{}) do - attrs = Enum.into(attrs, %{source: "monica", file_name: "export.json", file_size: 1024}) - {:ok, import} = Imports.create_import(account_id, user_id, attrs) - import - end -end -``` - -- [ ] **Step 5: Run tests to verify they pass** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports_test.exs -v` -Expected: All PASS (some tests may fail because Monica/VCard source modules don't exist yet — that's fine, the `resolve_source` tests will be the ones that fail. If so, skip those for now and they'll pass after Task 5/6). - -- [ ] **Step 6: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports.ex test/kith/imports_test.exs test/support/fixtures/imports_fixtures.ex -git commit -m "feat: add Imports context with job management and record tracking" -``` - ---- - -### Task 5: VCard source adapter - -**Files:** -- Create: `lib/kith/imports/sources/vcard.ex` -- Create: `test/kith/imports/sources/vcard_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/imports/sources/vcard_test.exs`: - -```elixir -defmodule Kith.Imports.Sources.VCardTest do - use Kith.DataCase, async: true - - alias Kith.Imports.Sources.VCard, as: VCardSource - - describe "name/0" do - test "returns source name" do - assert VCardSource.name() == "vCard" - end - end - - describe "file_types/0" do - test "returns accepted file types" do - assert VCardSource.file_types() == [".vcf"] - end - end - - describe "supports_api?/0" do - test "returns false" do - refute VCardSource.supports_api?() - end - end - - describe "validate_file/1" do - test "validates a proper vCard file" do - data = "BEGIN:VCARD\r\nVERSION:3.0\r\nFN:Jane Doe\r\nEND:VCARD\r\n" - assert {:ok, _} = VCardSource.validate_file(data) - end - - test "rejects invalid data" do - assert {:error, _} = VCardSource.validate_file("not a vcard") - end - end - - describe "parse_summary/1" do - test "returns contact count" do - data = """ - BEGIN:VCARD\r\nVERSION:3.0\r\nFN:Jane Doe\r\nEND:VCARD\r\n\ - BEGIN:VCARD\r\nVERSION:3.0\r\nFN:John Smith\r\nEND:VCARD\r\n\ - """ - assert {:ok, %{contacts: 2}} = VCardSource.parse_summary(data) - end - end -end -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/vcard_test.exs -v` -Expected: FAIL — module not found. - -- [ ] **Step 3: Implement VCard source** - -Create `lib/kith/imports/sources/vcard.ex`: - -```elixir -defmodule Kith.Imports.Sources.VCard do - @moduledoc """ - VCard import source. Wraps the existing `Kith.VCard.Parser`. - """ - - @behaviour Kith.Imports.Source - - alias Kith.VCard.Parser - alias Kith.Contacts - alias Kith.Imports - - require Logger - - @impl true - def name, do: "vCard" - - @impl true - def file_types, do: [".vcf"] - - @impl true - def supports_api?, do: false - - @impl true - def validate_file(data) do - if String.contains?(data, "BEGIN:VCARD") do - {:ok, %{}} - else - {:error, "File does not appear to be a valid vCard file"} - end - end - - @impl true - def parse_summary(data) do - case Parser.parse(data) do - {:ok, contacts} -> {:ok, %{contacts: length(contacts)}} - {:error, reason} -> {:error, reason} - end - end - - @impl true - def import(account_id, user_id, data, opts) do - import_record = opts[:import] - - case Parser.parse(data) do - {:ok, parsed_contacts} -> - total = length(parsed_contacts) - topic = "import:#{account_id}" - broadcast_interval = max(1, div(total, 50)) - - result = - parsed_contacts - |> Enum.with_index(1) - |> Enum.reduce(%{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, fn {parsed, idx}, acc -> - # Check cancellation - if import_record && rem(idx, 10) == 0 do - refreshed = Imports.get_import!(import_record.id) - if refreshed.status == "cancelled", do: throw(:cancelled) - end - - result = - try do - case Contacts.import_contact(account_id, parsed) do - {:ok, contact} -> - if import_record do - source_id = "vcard-#{idx}" - Imports.record_imported_entity(import_record, "contact", source_id, "contact", contact.id) - end - %{acc | contacts: acc.contacts + 1} - - {:error, reason} -> - add_error(acc, "Contact #{idx}: #{inspect(reason)}") - end - rescue - e -> - add_error(acc, "Contact #{idx}: #{Exception.message(e)}") - end - - if rem(idx, broadcast_interval) == 0 || idx == total do - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_progress, %{current: idx, total: total}}) - end - - result - end) - - {:ok, result} - - {:error, reason} -> - {:error, reason} - end - catch - :cancelled -> {:ok, %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: ["Import cancelled"]}} - end - - defp add_error(acc, msg) do - errors = if length(acc.errors) < 50, do: acc.errors ++ [msg], else: acc.errors - %{acc | skipped: acc.skipped + 1, error_count: acc.error_count + 1, errors: errors} - end -end -``` - -- [ ] **Step 4: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/vcard_test.exs -v` -Expected: All PASS - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/sources/vcard.ex test/kith/imports/sources/vcard_test.exs -git commit -m "feat: add VCard import source adapter" -``` - ---- - -### Task 6: ImportSourceWorker — Generic Oban worker - -**Files:** -- Create: `lib/kith/workers/import_source_worker.ex` -- Create: `test/kith/workers/import_source_worker_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/workers/import_source_worker_test.exs`: - -```elixir -defmodule Kith.Workers.ImportSourceWorkerTest do - use Kith.DataCase, async: true - use Oban.Testing, repo: Kith.Repo - - alias Kith.Workers.ImportSourceWorker - alias Kith.Imports - - import Kith.AccountsFixtures - import Kith.ImportsFixtures - - setup do - user = user_fixture() - %{user: user, account_id: user.account_id} - end - - describe "perform/1" do - test "processes a vcard import", %{account_id: account_id, user: user} do - # Store a VCF file - vcf_data = "BEGIN:VCARD\r\nVERSION:3.0\r\nN:Doe;Jane;;;\r\nFN:Jane Doe\r\nEND:VCARD\r\n" - storage_key = "imports/test/export.vcf" - {:ok, _} = Kith.Storage.upload_binary(vcf_data, storage_key) - - import_job = import_fixture(account_id, user.id, %{ - source: "vcard", - file_name: "export.vcf", - file_storage_key: storage_key - }) - - assert :ok = perform_job(ImportSourceWorker, %{import_id: import_job.id}) - - updated = Imports.get_import!(import_job.id) - assert updated.status == "completed" - assert updated.summary["contacts"] >= 1 - end - - test "marks import as failed on error", %{account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{ - source: "vcard", - file_name: "export.vcf", - file_storage_key: "nonexistent/path.vcf" - }) - - assert {:error, _} = perform_job(ImportSourceWorker, %{import_id: import_job.id}) - - updated = Imports.get_import!(import_job.id) - assert updated.status == "failed" - end - end -end -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/import_source_worker_test.exs -v` -Expected: FAIL — module not found. - -- [ ] **Step 3: Implement the worker** - -Create `lib/kith/workers/import_source_worker.ex`: - -```elixir -defmodule Kith.Workers.ImportSourceWorker do - @moduledoc """ - Generic Oban worker that orchestrates any import source. - - Loads the import job, resolves the source module, loads the file from - Storage, and delegates to `source.import/4`. Broadcasts progress via PubSub. - """ - - use Oban.Worker, queue: :imports, max_attempts: 3 - - require Logger - - alias Kith.Imports - - @impl Oban.Worker - def perform(%Oban.Job{args: %{"import_id" => import_id}}) do - import = Imports.get_import!(import_id) - - with {:ok, source_mod} <- Imports.resolve_source(import.source), - {:ok, _} <- Imports.update_import_status(import, "processing", %{started_at: DateTime.utc_now()}), - {:ok, data} <- load_file(import.file_storage_key), - {:ok, summary} <- source_mod.import(import.account_id, import.user_id, data, %{import: import}) do - now = DateTime.utc_now() |> DateTime.truncate(:second) - summary_map = ensure_map(summary) - - Imports.update_import_status(import, "completed", %{ - summary: summary_map, - completed_at: now - }) - - topic = "import:#{import.account_id}" - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) - - Logger.info("Import #{import_id} completed: #{inspect(summary_map)}") - :ok - else - {:error, reason} -> - Logger.error("Import #{import_id} failed: #{inspect(reason)}") - Imports.update_import_status(import, "failed", %{ - summary: %{error: inspect(reason)}, - completed_at: DateTime.utc_now() |> DateTime.truncate(:second) - }) - {:error, reason} - end - end - - defp load_file(nil), do: {:error, "No file storage key"} - defp load_file(key) do - case Kith.Storage.read(key) do - {:ok, data} -> {:ok, data} - {:error, reason} -> {:error, "Failed to load file: #{inspect(reason)}"} - end - end - - # Handle plain maps (already a map) vs structs - defp ensure_map(%{__struct__: _} = s), do: Map.from_struct(s) - defp ensure_map(m) when is_map(m), do: m -end -``` - -**Note:** Check if `Kith.Storage.read/1` exists. If not, you'll need to add it — look at the Storage module for the equivalent function that reads a file by key. It may be named `download/1` or `get/1`. Adapt the function name accordingly. - -- [ ] **Step 4: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/import_source_worker_test.exs -v` -Expected: All PASS (may need to adjust `Storage.read/1` to match actual API). - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/import_source_worker.ex test/kith/workers/import_source_worker_test.exs -git commit -m "feat: add generic ImportSourceWorker for Oban-based imports" -``` - ---- - -### Task 7: Oban config — Add new queues and cron jobs - -**Files:** -- Modify: `config/config.exs:34-53` - -- [ ] **Step 1: Add queues and cron entry** - -In `config/config.exs`, add to the `queues` list: - -```elixir - photo_sync: 5, - api_supplement: 3 -``` - -Add to the `crontab` list: - -```elixir - {"0 5 * * 0", Kith.Workers.ImportFileCleanupWorker} -``` - -- [ ] **Step 2: Register JSON MIME type for uploads** - -Add to the existing `config :mime` line or add new: - -```elixir -config :mime, :types, %{"text/vcard" => ["vcf"], "application/json" => ["json"]} -``` - -- [ ] **Step 3: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add config/config.exs -git commit -m "feat: add photo_sync, api_supplement Oban queues and cleanup cron" -``` - ---- - -### Task 8: Photo.pending_sync? helper - -**Files:** -- Modify: `lib/kith/contacts/photo.ex` - -- [ ] **Step 1: Add pending_sync? helper to Photo** - -In `lib/kith/contacts/photo.ex`, add after the `changeset/2` function: - -```elixir - @doc "Returns true if the photo is awaiting sync from an external source." - def pending_sync?(%__MODULE__{storage_key: "pending_sync:" <> _}), do: true - def pending_sync?(%__MODULE__{}), do: false -``` - -- [ ] **Step 2: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/contacts/photo.ex -git commit -m "feat: add Photo.pending_sync? helper for import photo placeholders" -``` - ---- - -### Task 9: Monica source — Skeleton + validate_file + parse_summary - -**Files:** -- Create: `lib/kith/imports/sources/monica.ex` -- Create: `test/kith/imports/sources/monica_test.exs` -- Create: `test/support/fixtures/monica_export.json` (minimal test fixture) - -This is the first of several tasks building out the Monica source. We start with the structural validation and summary parsing — the `import/4` callback is built incrementally in Tasks 10-13. - -- [ ] **Step 1: Create a minimal Monica JSON test fixture** - -Create `test/support/fixtures/monica_export.json` — a minimal but structurally complete Monica export: - -```json -{ - "version": "3.0.0", - "app_version": "4.1.2", - "account": { - "data": { - "uuid": "test-account-uuid" - } - }, - "contacts": { - "data": [ - { - "uuid": "contact-uuid-1", - "first_name": "Jane", - "last_name": "Doe", - "middle_name": "Marie", - "nickname": "JD", - "description": "A friend", - "company": "Acme", - "job": "Engineer", - "is_starred": true, - "is_active": true, - "is_dead": false, - "gender": {"data": {"uuid": "gender-uuid-1", "name": "Female"}}, - "birthdate": { - "data": { - "date": "1990-06-15", - "is_year_unknown": false, - "is_age_based": false - } - }, - "first_met_date": { - "data": { - "date": "2015-09-01", - "is_year_unknown": false, - "is_age_based": false - } - }, - "first_met_through": null, - "tags": {"data": [{"uuid": "tag-uuid-1", "name": "College"}]}, - "contact_fields": { - "data": [ - { - "uuid": "cf-uuid-1", - "value": "jane@example.com", - "contact_field_type": {"data": {"uuid": "cft-uuid-1", "name": "Email"}} - } - ] - }, - "addresses": { - "data": [ - { - "uuid": "addr-uuid-1", - "street": "123 Main St", - "city": "Springfield", - "province": "IL", - "postal_code": "62701", - "country": "US" - } - ] - }, - "notes": { - "data": [ - { - "uuid": "note-uuid-1", - "body": "Met at orientation", - "created_at": "2020-01-15T10:00:00Z" - } - ] - }, - "reminders": {"data": []}, - "pets": { - "data": [ - { - "uuid": "pet-uuid-1", - "name": "Buddy", - "pet_category": {"data": {"name": "Dog"}} - } - ] - }, - "photos": { - "data": [ - { - "uuid": "photo-uuid-1", - "file_name": "profile.jpg" - } - ] - }, - "activities": {"data": []} - }, - { - "uuid": "contact-uuid-2", - "first_name": "John", - "last_name": "Smith", - "middle_name": null, - "nickname": null, - "description": null, - "company": null, - "job": null, - "is_starred": false, - "is_active": true, - "is_dead": false, - "gender": null, - "birthdate": {"data": {"date": null, "is_year_unknown": false, "is_age_based": false}}, - "first_met_date": {"data": {"date": null, "is_year_unknown": false, "is_age_based": false}}, - "first_met_through": {"data": {"uuid": "contact-uuid-1"}}, - "tags": {"data": []}, - "contact_fields": {"data": []}, - "addresses": {"data": []}, - "notes": {"data": []}, - "reminders": {"data": []}, - "pets": {"data": []}, - "photos": {"data": []}, - "activities": {"data": []} - } - ] - }, - "relationships": { - "data": [ - { - "uuid": "rel-uuid-1", - "contact_is": {"data": {"uuid": "contact-uuid-1"}}, - "of_contact": {"data": {"uuid": "contact-uuid-2"}}, - "relationship_type": {"data": {"uuid": "rt-uuid-1", "name": "Friend", "reverse_name": "Friend"}} - } - ] - } -} -``` - -- [ ] **Step 2: Write failing tests** - -Create `test/kith/imports/sources/monica_test.exs`: - -```elixir -defmodule Kith.Imports.Sources.MonicaTest do - use Kith.DataCase, async: true - - alias Kith.Imports.Sources.Monica, as: MonicaSource - - @fixture_path "test/support/fixtures/monica_export.json" - - setup do - data = File.read!(@fixture_path) - %{data: data} - end - - describe "name/0" do - test "returns source name" do - assert MonicaSource.name() == "Monica CRM" - end - end - - describe "file_types/0" do - test "returns accepted file types" do - assert MonicaSource.file_types() == [".json"] - end - end - - describe "supports_api?/0" do - test "returns true" do - assert MonicaSource.supports_api?() - end - end - - describe "validate_file/1" do - test "validates a proper Monica export", %{data: data} do - assert {:ok, _} = MonicaSource.validate_file(data) - end - - test "rejects invalid JSON" do - assert {:error, _} = MonicaSource.validate_file("not json") - end - - test "rejects JSON missing required keys" do - assert {:error, _} = MonicaSource.validate_file(Jason.encode!(%{foo: "bar"})) - end - end - - describe "parse_summary/1" do - test "returns entity counts", %{data: data} do - assert {:ok, summary} = MonicaSource.parse_summary(data) - assert summary.contacts == 2 - assert summary.relationships == 1 - assert summary.photos == 1 - end - end -end -``` - -- [ ] **Step 3: Run tests to verify they fail** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/monica_test.exs -v` -Expected: FAIL — module not found. - -- [ ] **Step 4: Implement Monica source skeleton** - -Create `lib/kith/imports/sources/monica.ex`: - -```elixir -defmodule Kith.Imports.Sources.Monica do - @moduledoc """ - Monica CRM import source. Parses JSON export files and imports contacts - with all associated data. Supports optional API photo sync. - """ - - @behaviour Kith.Imports.Source - - require Logger - - alias Kith.Imports - - @pet_species_map %{ - "Dog" => "dog", "Cat" => "cat", "Bird" => "bird", "Fish" => "fish", - "Reptile" => "reptile", "Rabbit" => "rabbit", "Hamster" => "hamster" - } - - @impl true - def name, do: "Monica CRM" - - @impl true - def file_types, do: [".json"] - - @impl true - def supports_api?, do: true - - @impl true - def validate_file(data) do - with {:ok, parsed} <- Jason.decode(data), - true <- is_map(parsed), - true <- Map.has_key?(parsed, "contacts"), - true <- Map.has_key?(parsed, "account") do - {:ok, parsed} - else - _ -> {:error, "Invalid Monica CRM export file. Expected JSON with 'contacts' and 'account' keys."} - end - end - - @impl true - def parse_summary(data) do - with {:ok, parsed} <- Jason.decode(data) do - contacts = get_in(parsed, ["contacts", "data"]) || [] - relationships = get_in(parsed, ["relationships", "data"]) || [] - - photos = - contacts - |> Enum.flat_map(fn c -> get_in(c, ["photos", "data"]) || [] end) - |> length() - - notes = - contacts - |> Enum.flat_map(fn c -> get_in(c, ["notes", "data"]) || [] end) - |> length() - - {:ok, %{ - contacts: length(contacts), - relationships: length(relationships), - photos: photos, - notes: notes - }} - end - end - - @impl true - def import(account_id, user_id, data, opts) do - import_record = opts[:import] - - with {:ok, parsed} <- Jason.decode(data) do - contacts_data = get_in(parsed, ["contacts", "data"]) || [] - relationships_data = get_in(parsed, ["relationships", "data"]) || [] - total = length(contacts_data) - topic = "import:#{account_id}" - broadcast_interval = max(1, div(total, 50)) - - # Phase 1: Reference data - gender_map = import_reference_genders(account_id, contacts_data) - tag_map = import_reference_tags(account_id, contacts_data) - cft_map = import_reference_contact_field_types(account_id, contacts_data) - atc_map = import_reference_activity_type_categories(account_id, contacts_data) - - # Phase 2 & 3: Contacts + children (including activities with cross-contact dedup) - # processed_activities is a MapSet tracking activity UUIDs already created in this run - {contact_map, summary, _processed_activities} = - contacts_data - |> Enum.with_index(1) - |> Enum.reduce({%{}, init_summary(), MapSet.new()}, fn {contact_data, idx}, {cmap, acc, proc_acts} -> - # Check cancellation - if import_record && rem(idx, 10) == 0 do - refreshed = Imports.get_import!(import_record.id) - if refreshed.status == "cancelled", do: throw(:cancelled) - end - - case import_single_contact(account_id, user_id, contact_data, import_record, %{ - gender_map: gender_map, - tag_map: tag_map, - cft_map: cft_map, - atc_map: atc_map, - processed_activities: proc_acts - }) do - {:ok, contact, new_proc_acts} -> - new_cmap = Map.put(cmap, contact_data["uuid"], contact.id) - new_acc = %{acc | contacts: acc.contacts + 1} - - if rem(idx, broadcast_interval) == 0 || idx == total do - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_progress, %{current: idx, total: total}}) - end - - {new_cmap, new_acc, new_proc_acts} - - {:skip, reason} -> - Logger.info("Skipped contact #{contact_data["uuid"]}: #{reason}") - {cmap, %{acc | skipped: acc.skipped + 1}, proc_acts} - - {:error, reason} -> - Logger.warning("Failed to import contact #{contact_data["uuid"]}: #{inspect(reason)}") - {cmap, add_error(acc, "#{contact_data["first_name"]} #{contact_data["last_name"]}: #{inspect(reason)}"), proc_acts} - end - end) - - # Phase 4: Cross-contact references - import_relationships(account_id, relationships_data, contact_map, import_record) - import_first_met_through_links(account_id, contacts_data, contact_map) - - # Finalize summary — count notes from import_records (more accurate than in-loop counting) - notes_count = if import_record do - import_record.id - |> Imports.count_import_records_by_type("note") - else - 0 - end - - {:ok, %{summary | notes: notes_count}} - end - catch - :cancelled -> {:ok, init_summary()} - end - - # --- API callbacks --- - - @impl true - def test_connection(%{url: url, api_key: api_key}) do - case Req.get("#{url}/api/me", headers: [{"Authorization", "Bearer #{api_key}"}]) do - {:ok, %{status: 200}} -> :ok - {:ok, %{status: status}} -> {:error, "API returned status #{status}"} - {:error, reason} -> {:error, "Connection failed: #{inspect(reason)}"} - end - end - - @impl true - def fetch_photo(%{url: url, api_key: api_key}, photo_uuid) do - case Req.get("#{url}/api/photos/#{photo_uuid}", - headers: [{"Authorization", "Bearer #{api_key}"}]) do - {:ok, %{status: 200, body: body}} -> {:ok, body} - {:ok, %{status: 429}} -> {:error, :rate_limited} - {:ok, %{status: status}} -> {:error, "HTTP #{status}"} - {:error, reason} -> {:error, reason} - end - end - - @impl true - def api_supplement_options do - [ - %{key: :photos, label: "Sync photos", description: "Download contact photos via API"}, - %{key: :first_met_details, label: "Fetch \"How we met\" details", - description: "first_met_where and first_met_additional_info (not in JSON export)"} - ] - end - - @impl true - def fetch_supplement(%{url: url, api_key: api_key}, contact_source_id, :first_met_details) do - case Req.get("#{url}/api/contacts/#{contact_source_id}", - headers: [{"Authorization", "Bearer #{api_key}"}]) do - {:ok, %{status: 200, body: body}} -> - data = get_in(body, ["data"]) || body - {:ok, %{ - first_met_where: data["first_met_where"], - first_met_additional_info: data["first_met_additional_information"] - }} - {:ok, %{status: 429}} -> {:error, :rate_limited} - {:ok, %{status: status}} -> {:error, "HTTP #{status}"} - {:error, reason} -> {:error, reason} - end - end - - # --- Private: Phase 1 — Reference Data --- - - defp import_reference_genders(account_id, contacts_data) do - contacts_data - |> Enum.map(&get_in(&1, ["gender", "data"])) - |> Enum.reject(&is_nil/1) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn gender_data, acc -> - case find_or_create_gender(account_id, gender_data["name"]) do - {:ok, gender} -> Map.put(acc, gender_data["uuid"], gender.id) - _ -> acc - end - end) - end - - defp find_or_create_gender(account_id, name) do - alias Kith.Contacts.Gender - alias Kith.Repo - import Ecto.Query - - case Repo.one(from g in Gender, where: g.name == ^name and (is_nil(g.account_id) or g.account_id == ^account_id)) do - nil -> Kith.Contacts.create_gender(account_id, %{name: name}) - gender -> {:ok, gender} - end - end - - defp import_reference_tags(account_id, contacts_data) do - contacts_data - |> Enum.flat_map(fn c -> get_in(c, ["tags", "data"]) || [] end) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn tag_data, acc -> - case find_or_create_tag(account_id, tag_data["name"]) do - {:ok, tag} -> Map.put(acc, tag_data["uuid"], tag.id) - _ -> acc - end - end) - end - - defp find_or_create_tag(account_id, name) do - alias Kith.Contacts.Tag - alias Kith.Repo - import Ecto.Query - - case Repo.one(from t in Tag, where: t.account_id == ^account_id and t.name == ^name) do - nil -> Kith.Contacts.create_tag(account_id, %{name: name}) - tag -> {:ok, tag} - end - end - - defp import_reference_contact_field_types(account_id, contacts_data) do - contacts_data - |> Enum.flat_map(fn c -> get_in(c, ["contact_fields", "data"]) || [] end) - |> Enum.map(&get_in(&1, ["contact_field_type", "data"])) - |> Enum.reject(&is_nil/1) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn cft_data, acc -> - case find_or_create_contact_field_type(account_id, cft_data["name"]) do - {:ok, cft} -> Map.put(acc, cft_data["uuid"], cft.id) - _ -> acc - end - end) - end - - defp find_or_create_contact_field_type(account_id, name) do - alias Kith.Contacts.ContactFieldType - alias Kith.Repo - import Ecto.Query - - case Repo.one(from cft in ContactFieldType, where: cft.name == ^name and (is_nil(cft.account_id) or cft.account_id == ^account_id)) do - nil -> Kith.Contacts.create_contact_field_type(account_id, %{name: name}) - cft -> {:ok, cft} - end - end - - defp import_reference_activity_type_categories(account_id, contacts_data) do - contacts_data - |> Enum.flat_map(fn c -> get_in(c, ["activities", "data"]) || [] end) - |> Enum.map(&get_in(&1, ["activity_type_category", "data"])) - |> Enum.reject(&is_nil/1) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn atc_data, acc -> - case find_or_create_activity_type_category(account_id, atc_data["name"]) do - {:ok, atc} -> Map.put(acc, atc_data["uuid"], atc.id) - _ -> acc - end - end) - end - - defp find_or_create_activity_type_category(account_id, name) do - alias Kith.Contacts.ActivityTypeCategory - alias Kith.Repo - import Ecto.Query - - case Repo.one(from atc in ActivityTypeCategory, where: atc.name == ^name and (is_nil(atc.account_id) or atc.account_id == ^account_id)) do - nil -> Kith.Contacts.create_activity_type_category(account_id, %{name: name}) - atc -> {:ok, atc} - end - end - - # --- Private: Phase 2 — Single Contact Import --- - - # Returns {:ok, contact, updated_processed_activities} | {:skip, reason} | {:error, reason} - defp import_single_contact(account_id, user_id, contact_data, import_record, ref_maps) do - uuid = contact_data["uuid"] - proc_acts = ref_maps.processed_activities - - # Check for existing import record - existing = if import_record, do: Imports.find_import_record(account_id, "monica", "contact", uuid) - - case existing do - %{local_entity_id: local_id} -> - # Re-import: check if soft-deleted - case Kith.Repo.get(Kith.Contacts.Contact, local_id) do - %{deleted_at: deleted_at} when not is_nil(deleted_at) -> - {:skip, "previously deleted in Kith, not restoring"} - nil -> - do_import_contact(account_id, user_id, contact_data, import_record, ref_maps) - _contact -> - do_upsert_contact(account_id, user_id, local_id, contact_data, import_record, ref_maps) - end - nil -> - do_import_contact(account_id, user_id, contact_data, import_record, ref_maps) - end - end - - defp do_import_contact(account_id, user_id, contact_data, import_record, ref_maps) do - attrs = map_contact_attrs(contact_data, ref_maps) - - case Kith.Contacts.create_contact(account_id, attrs) do - {:ok, contact} -> - new_proc_acts = import_contact_children(contact, user_id, contact_data, import_record, ref_maps) - import_contact_tags(contact, contact_data, ref_maps.tag_map) - - if import_record do - Imports.record_imported_entity(import_record, "contact", contact_data["uuid"], "contact", contact.id) - end - - {:ok, contact, new_proc_acts} - - {:error, changeset} -> - {:error, changeset} - end - end - - defp do_upsert_contact(account_id, user_id, local_id, contact_data, import_record, ref_maps) do - contact = Kith.Repo.get!(Kith.Contacts.Contact, local_id) - attrs = map_contact_attrs(contact_data, ref_maps) - - case Kith.Contacts.update_contact(contact, attrs) do - {:ok, contact} -> - new_proc_acts = import_contact_children(contact, user_id, contact_data, import_record, ref_maps) - import_contact_tags(contact, contact_data, ref_maps.tag_map) - - if import_record do - Imports.record_imported_entity(import_record, "contact", contact_data["uuid"], "contact", contact.id) - end - - {:ok, contact, new_proc_acts} - - {:error, changeset} -> - {:error, changeset} - end - end - - defp map_contact_attrs(contact_data, ref_maps) do - gender_id = if gender = get_in(contact_data, ["gender", "data"]) do - Map.get(ref_maps.gender_map, gender["uuid"]) - end - - birthdate_info = parse_special_date(get_in(contact_data, ["birthdate", "data"])) - first_met_info = parse_special_date(get_in(contact_data, ["first_met_date", "data"])) - - %{ - first_name: contact_data["first_name"], - last_name: contact_data["last_name"], - middle_name: contact_data["middle_name"], - nickname: contact_data["nickname"], - description: contact_data["description"], - company: contact_data["company"], - occupation: contact_data["job"], - favorite: contact_data["is_starred"] || false, - is_archived: contact_data["is_active"] == false, - deceased: contact_data["is_dead"] || false, - gender_id: gender_id, - birthdate: birthdate_info.date, - birthdate_year_unknown: birthdate_info.year_unknown, - first_met_at: first_met_info.date, - first_met_year_unknown: first_met_info.year_unknown - } - |> Enum.reject(fn {_k, v} -> is_nil(v) end) - |> Map.new() - end - - defp parse_special_date(nil), do: %{date: nil, year_unknown: false} - defp parse_special_date(%{"date" => nil}), do: %{date: nil, year_unknown: false} - defp parse_special_date(%{"date" => date_str, "is_year_unknown" => year_unknown} = data) do - case Date.from_iso8601(date_str) do - {:ok, date} -> - if year_unknown && !data["is_age_based"] do - # Store with sentinel year 1, flag as unknown - %{date: %{date | year: 1}, year_unknown: true} - else - %{date: date, year_unknown: false} - end - _ -> - %{date: nil, year_unknown: false} - end - end - defp parse_special_date(_), do: %{date: nil, year_unknown: false} - - # --- Private: Phase 3 — Contact Children --- - - # Returns updated processed_activities MapSet - defp import_contact_children(contact, user_id, contact_data, import_record, ref_maps) do - import_contact_fields(contact, contact_data, ref_maps.cft_map, import_record) - import_addresses(contact, contact_data, import_record) - import_notes(contact, user_id, contact_data, import_record) - import_reminders(contact, user_id, contact_data, import_record) - import_pets(contact, contact_data, import_record) - import_photos(contact, contact_data, import_record) - import_activities(contact, user_id, contact_data, import_record, ref_maps.processed_activities, ref_maps.atc_map) - end - - defp import_contact_fields(contact, contact_data, cft_map, import_record) do - for cf <- get_in(contact_data, ["contact_fields", "data"]) || [] do - cft_uuid = get_in(cf, ["contact_field_type", "data", "uuid"]) - cft_id = Map.get(cft_map, cft_uuid) - - if cft_id do - case Kith.Contacts.create_contact_field(contact, %{ - "value" => cf["value"], - "contact_field_type_id" => cft_id - }) do - {:ok, field} -> - if import_record, do: Imports.record_imported_entity(import_record, "contact_field", cf["uuid"], "contact_field", field.id) - {:error, reason} -> - Logger.warning("Failed to import contact field #{cf["uuid"]}: #{inspect(reason)}") - end - end - end - end - - defp import_addresses(contact, contact_data, import_record) do - for addr <- get_in(contact_data, ["addresses", "data"]) || [] do - case Kith.Contacts.create_address(contact, %{ - "line1" => addr["street"], - "city" => addr["city"], - "province" => addr["province"], - "postal_code" => addr["postal_code"], - "country" => addr["country"] - }) do - {:ok, address} -> - if import_record, do: Imports.record_imported_entity(import_record, "address", addr["uuid"], "address", address.id) - {:error, reason} -> - Logger.warning("Failed to import address #{addr["uuid"]}: #{inspect(reason)}") - end - end - end - - defp import_notes(contact, user_id, contact_data, import_record) do - for note <- get_in(contact_data, ["notes", "data"]) || [] do - case Kith.Contacts.create_note(contact, user_id, %{"body" => note["body"]}) do - {:ok, created_note} -> - if import_record, do: Imports.record_imported_entity(import_record, "note", note["uuid"], "note", created_note.id) - {:error, reason} -> - Logger.warning("Failed to import note #{note["uuid"]}: #{inspect(reason)}") - end - end - end - - defp import_reminders(contact, user_id, contact_data, import_record) do - for reminder <- get_in(contact_data, ["reminders", "data"]) || [] do - attrs = %{ - type: "one_time", - title: reminder["title"] || "Imported reminder", - next_reminder_date: parse_date_string(reminder["next_expected_date"]), - contact_id: contact.id - } - - if attrs.next_reminder_date do - case Kith.Reminders.create_reminder(contact.account_id, user_id, attrs) do - {:ok, created} -> - if import_record, do: Imports.record_imported_entity(import_record, "reminder", reminder["uuid"], "reminder", created.id) - {:error, reason} -> - Logger.warning("Failed to import reminder #{reminder["uuid"]}: #{inspect(reason)}") - end - end - end - end - - defp import_pets(contact, contact_data, import_record) do - for pet <- get_in(contact_data, ["pets", "data"]) || [] do - category_name = get_in(pet, ["pet_category", "data", "name"]) || "other" - species = Map.get(@pet_species_map, category_name, "other") - - case Kith.Pets.create_pet(contact.account_id, %{ - name: pet["name"] || "Unnamed", - species: species, - contact_id: contact.id - }) do - {:ok, created_pet} -> - if import_record, do: Imports.record_imported_entity(import_record, "pet", pet["uuid"], "pet", created_pet.id) - {:error, reason} -> - Logger.warning("Failed to import pet #{pet["uuid"]}: #{inspect(reason)}") - end - end - end - - defp import_photos(contact, contact_data, import_record) do - for photo <- get_in(contact_data, ["photos", "data"]) || [] do - case Kith.Contacts.create_photo(contact, %{ - "file_name" => photo["file_name"] || "photo.jpg", - "storage_key" => "pending_sync:#{photo["uuid"]}", - "file_size" => 0, - "content_type" => "image/jpeg" - }) do - {:ok, created_photo} -> - if import_record, do: Imports.record_imported_entity(import_record, "photo", photo["uuid"], "photo", created_photo.id) - {:error, reason} -> - Logger.warning("Failed to import photo #{photo["uuid"]}: #{inspect(reason)}") - end - end - end - - # Returns updated processed_activities MapSet. - # Activities can be shared across contacts — deduplicate by UUID. - # On first encounter: create the activity + join table entry. - # On subsequent contacts referencing the same UUID: add only the join table entry. - # On resume after cancellation: check import_records first (MapSet starts empty). - defp import_activities(contact, user_id, contact_data, import_record, processed_activities, atc_map) do - activities = get_in(contact_data, ["activities", "data"]) || [] - - Enum.reduce(activities, processed_activities, fn activity_data, proc_acts -> - uuid = activity_data["uuid"] - already_in_run = MapSet.member?(proc_acts, uuid) - - # On resume: check import_records if not in this run's MapSet - already_in_db = if !already_in_run && import_record do - Imports.find_import_record(contact.account_id, "monica", "activity", uuid) != nil - else - false - end - - cond do - already_in_run || already_in_db -> - # Activity already created — just add the join table entry - existing_rec = Imports.find_import_record(contact.account_id, "monica", "activity", uuid) - if existing_rec do - Kith.Repo.insert_all("activity_contacts", - [%{activity_id: existing_rec.local_entity_id, contact_id: contact.id}], - on_conflict: :nothing - ) - end - proc_acts - - true -> - # First encounter — create the activity with type category lookup - atc_uuid = get_in(activity_data, ["activity_type_category", "data", "uuid"]) - atc_id = if atc_uuid, do: Map.get(atc_map, atc_uuid) - - attrs = %{ - "title" => activity_data["title"] || "Imported activity", - "description" => activity_data["description"], - "occurred_at" => parse_datetime(activity_data["occurred_at"]) || DateTime.utc_now(), - "activity_type_category_id" => atc_id - } - - case Kith.Activities.create_activity(contact.account_id, attrs, [contact.id]) do - {:ok, %{activity: activity}} -> - if import_record do - Imports.record_imported_entity(import_record, "activity", uuid, "activity", activity.id) - end - MapSet.put(proc_acts, uuid) - - {:error, _reason} -> - Logger.warning("Failed to import activity #{uuid}") - proc_acts - end - end - end) - end - - defp parse_datetime(nil), do: nil - defp parse_datetime(str) do - case DateTime.from_iso8601(str) do - {:ok, dt, _offset} -> dt - _ -> nil - end - end - - defp import_contact_tags(contact, contact_data, tag_map) do - for tag_data <- get_in(contact_data, ["tags", "data"]) || [] do - tag_id = Map.get(tag_map, tag_data["uuid"]) - if tag_id do - Kith.Repo.insert_all("contact_tags", - [%{contact_id: contact.id, tag_id: tag_id}], - on_conflict: :nothing - ) - end - end - end - - # --- Private: Phase 4 — Cross-Contact References --- - - defp import_relationships(account_id, relationships_data, contact_map, import_record) do - for rel <- relationships_data do - contact_uuid = get_in(rel, ["contact_is", "data", "uuid"]) - related_uuid = get_in(rel, ["of_contact", "data", "uuid"]) - contact_id = Map.get(contact_map, contact_uuid) - related_id = Map.get(contact_map, related_uuid) - - if contact_id && related_id do - rt_name = get_in(rel, ["relationship_type", "data", "name"]) || "Friend" - case find_or_create_relationship_type(account_id, rt_name, get_in(rel, ["relationship_type", "data"])) do - {:ok, rt} -> - contact = %Kith.Contacts.Contact{id: contact_id, account_id: account_id} - case Kith.Contacts.create_relationship(contact, %{ - "related_contact_id" => related_id, - "relationship_type_id" => rt.id - }) do - {:ok, relationship} -> - if import_record, do: Imports.record_imported_entity(import_record, "relationship", rel["uuid"], "relationship", relationship.id) - {:error, reason} -> - Logger.warning("Failed to import relationship #{rel["uuid"]}: #{inspect(reason)}") - end - _ -> :ok - end - else - failed = if is_nil(contact_id), do: contact_uuid, else: related_uuid - Logger.warning("Skipping relationship #{rel["uuid"]}: contact #{failed} was not imported") - end - end - end - - defp find_or_create_relationship_type(account_id, name, data) do - alias Kith.Contacts.RelationshipType - alias Kith.Repo - import Ecto.Query - - reverse_name = (data && data["reverse_name"]) || name - - case Repo.one(from rt in RelationshipType, where: rt.name == ^name and (is_nil(rt.account_id) or rt.account_id == ^account_id)) do - nil -> Kith.Contacts.create_relationship_type(account_id, %{name: name, reverse_name: reverse_name}) - rt -> {:ok, rt} - end - end - - defp import_first_met_through_links(account_id, contacts_data, contact_map) do - for contact_data <- contacts_data do - through_uuid = get_in(contact_data, ["first_met_through", "data", "uuid"]) - contact_id = Map.get(contact_map, contact_data["uuid"]) - - if through_uuid && contact_id do - through_id = Map.get(contact_map, through_uuid) - if through_id do - contact = Kith.Repo.get!(Kith.Contacts.Contact, contact_id) - Kith.Contacts.update_contact(contact, %{first_met_through_id: through_id}) - else - Logger.warning("first_met_through #{through_uuid} not found for contact #{contact_data["uuid"]}") - end - end - end - end - - # --- Helpers --- - - defp init_summary do - %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []} - end - - defp add_error(acc, msg) do - errors = if length(acc.errors) < 50, do: acc.errors ++ [msg], else: acc.errors - %{acc | skipped: acc.skipped + 1, error_count: acc.error_count + 1, errors: errors} - end - - defp parse_date_string(nil), do: nil - defp parse_date_string(str) do - case Date.from_iso8601(str) do - {:ok, date} -> date - _ -> nil - end - end -end -``` - -- [ ] **Step 5: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/monica_test.exs -v` -Expected: All PASS - -- [ ] **Step 6: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/sources/monica.ex test/kith/imports/sources/monica_test.exs test/support/fixtures/monica_export.json -git commit -m "feat: add Monica CRM import source with full data mapping" -``` - ---- - -### Task 10: Monica source — Integration test (full import) - -**Files:** -- Modify: `test/kith/imports/sources/monica_test.exs` - -- [ ] **Step 1: Write integration test for full import** - -Add to `test/kith/imports/sources/monica_test.exs`: - -```elixir - describe "import/4" do - setup do - seed_reference_data!() - user = user_fixture() - %{user: user, account_id: user.account_id} - end - - test "imports contacts with all children", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - - assert {:ok, summary} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - assert summary.contacts == 2 - - # Verify contacts exist - contacts = Kith.Contacts.list_contacts(account_id) - assert length(contacts) == 2 - - jane = Enum.find(contacts, &(&1.first_name == "Jane")) - assert jane.last_name == "Doe" - assert jane.middle_name == "Marie" - assert jane.occupation == "Engineer" - assert jane.favorite == true - end - - test "imports contact children (notes, addresses, pets)", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - - contacts = Kith.Contacts.list_contacts(account_id) - jane = Enum.find(contacts, &(&1.first_name == "Jane")) - - notes = Kith.Contacts.list_notes(jane.id, user.id) - assert length(notes) == 1 - - pets = Kith.Pets.list_pets(account_id, jane.id) - assert length(pets) == 1 - assert hd(pets).species == "dog" - end - - test "creates import_records for deduplication", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - - rec = Kith.Imports.find_import_record(account_id, "monica", "contact", "contact-uuid-1") - assert rec != nil - end - - test "handles re-import (upsert)", %{data: data, account_id: account_id, user: user} do - import_job1 = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job1}) - - # Complete first import so we can create a second - Kith.Imports.update_import_status(import_job1, "completed") - - import_job2 = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, summary} = MonicaSource.import(account_id, user.id, data, %{import: import_job2}) - - # Should still have 2 contacts (upserted, not duplicated) - contacts = Kith.Contacts.list_contacts(account_id) - assert length(contacts) == 2 - assert summary.contacts == 2 - end - - test "resolves first_met_through cross-references", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - - contacts = Kith.Contacts.list_contacts(account_id) - john = Enum.find(contacts, &(&1.first_name == "John")) - jane = Enum.find(contacts, &(&1.first_name == "Jane")) - - reloaded = Kith.Repo.get!(Kith.Contacts.Contact, john.id) - assert reloaded.first_met_through_id == jane.id - end - end -``` - -Add required imports at the top: - -```elixir - import Kith.AccountsFixtures - import Kith.ContactsFixtures - import Kith.ImportsFixtures -``` - -- [ ] **Step 2: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/monica_test.exs -v` -Expected: All PASS. Debug any failures — these exercise the full import pipeline. - -- [ ] **Step 3: Run full test suite** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All pass. - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add test/kith/imports/sources/monica_test.exs -git commit -m "test: add Monica source integration tests for full import pipeline" -``` - ---- - -### Task 11: PhotoSyncWorker - -**Files:** -- Create: `lib/kith/workers/photo_sync_worker.ex` -- Create: `test/kith/workers/photo_sync_worker_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/workers/photo_sync_worker_test.exs`: - -```elixir -defmodule Kith.Workers.PhotoSyncWorkerTest do - use Kith.DataCase, async: true - use Oban.Testing, repo: Kith.Repo - - alias Kith.Workers.PhotoSyncWorker - - import Kith.AccountsFixtures - import Kith.ContactsFixtures - import Kith.ImportsFixtures - - describe "perform/1" do - test "discards when import not found" do - assert {:discard, _} = perform_job(PhotoSyncWorker, %{ - import_id: 999_999, - photo_id: 1, - source_photo_id: "uuid" - }) - end - end -end -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/photo_sync_worker_test.exs -v` - -- [ ] **Step 3: Implement PhotoSyncWorker** - -Create `lib/kith/workers/photo_sync_worker.ex`: - -```elixir -defmodule Kith.Workers.PhotoSyncWorker do - @moduledoc """ - Oban worker that downloads a single photo from an external source API - and stores it in Kith.Storage. Independent per-photo jobs with staggered scheduling. - """ - - use Oban.Worker, queue: :photo_sync, max_attempts: 3 - - require Logger - - alias Kith.Imports - alias Kith.Contacts.Photo - alias Kith.Repo - - @impl Oban.Worker - def perform(%Oban.Job{ - args: %{"import_id" => import_id, "photo_id" => photo_id, "source_photo_id" => source_photo_id}, - attempt: attempt, - max_attempts: max_attempts - }) do - with {:import, %{} = import} <- {:import, Imports.get_import(import_id)}, - {:photo, %Photo{} = photo} <- {:photo, Repo.get(Photo, photo_id)}, - {:source, {:ok, source_mod}} <- {:source, Imports.resolve_source(import.source)} do - - # Check if import was cancelled - if import.status == "cancelled", do: throw(:cancelled) - - # Check storage limit - case Kith.Storage.check_storage_limit(import.account_id, 0) do - :ok -> :ok - {:error, _} -> - Logger.warning("Storage limit reached for account #{import.account_id}, discarding photo #{photo_id}") - Repo.delete(photo) - throw(:discard) - end - - credential = %{url: import.api_url, api_key: import.api_key_encrypted} - - case source_mod.fetch_photo(credential, source_photo_id) do - {:ok, binary} -> - storage_key = Kith.Storage.generate_key(import.account_id, "photos", photo.file_name) - {:ok, _} = Kith.Storage.upload_binary(binary, storage_key) - - photo - |> Ecto.Changeset.change(%{ - storage_key: storage_key, - file_size: byte_size(binary) - }) - |> Repo.update!() - - maybe_cleanup_api_key(import) - :ok - - {:error, :rate_limited} -> - {:snooze, 60} - - {:error, reason} -> - Logger.warning("Photo sync failed for #{source_photo_id}: #{inspect(reason)}") - - # On final attempt: delete the Photo record so the contact doesn't have - # a permanently broken pending_sync: reference - if attempt >= max_attempts do - Repo.delete(photo) - Logger.warning("Deleted photo #{photo_id} after #{max_attempts} failed attempts") - end - - {:error, reason} - end - else - {:import, nil} -> {:discard, "Import not found"} - {:photo, nil} -> {:discard, "Photo not found"} - {:source, {:error, _}} -> {:discard, "Unknown source"} - end - catch - :cancelled -> {:discard, "Import cancelled"} - :discard -> {:discard, "Storage limit reached"} - end - - @impl Oban.Worker - def timeout(_job), do: :timer.minutes(5) - - defp maybe_cleanup_api_key(import) do - if Imports.pending_async_jobs_count(import.id) <= 1 do - Imports.wipe_api_key(import) - end - end -end -``` - -- [ ] **Step 4: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/photo_sync_worker_test.exs -v` -Expected: PASS - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/photo_sync_worker.ex test/kith/workers/photo_sync_worker_test.exs -git commit -m "feat: add PhotoSyncWorker for async photo downloads" -``` - ---- - -### Task 12: ApiSupplementWorker - -**Files:** -- Create: `lib/kith/workers/api_supplement_worker.ex` -- Create: `test/kith/workers/api_supplement_worker_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/workers/api_supplement_worker_test.exs`: - -```elixir -defmodule Kith.Workers.ApiSupplementWorkerTest do - use Kith.DataCase, async: true - use Oban.Testing, repo: Kith.Repo - - alias Kith.Workers.ApiSupplementWorker - - describe "perform/1" do - test "discards when import not found" do - assert {:discard, _} = perform_job(ApiSupplementWorker, %{ - import_id: 999_999, - contact_id: 1, - source_contact_id: "uuid", - key: "first_met_details" - }) - end - end -end -``` - -- [ ] **Step 2: Implement ApiSupplementWorker** - -Create `lib/kith/workers/api_supplement_worker.ex`: - -```elixir -defmodule Kith.Workers.ApiSupplementWorker do - @moduledoc """ - Oban worker that fetches supplementary data from a source API. - Currently handles first_met_details (first_met_where, first_met_additional_info). - """ - - use Oban.Worker, queue: :api_supplement, max_attempts: 3 - - require Logger - - alias Kith.Imports - alias Kith.Contacts.Contact - alias Kith.Repo - - @impl Oban.Worker - def perform(%Oban.Job{args: %{ - "import_id" => import_id, - "contact_id" => contact_id, - "source_contact_id" => source_contact_id, - "key" => key - }}) do - key_atom = String.to_existing_atom(key) - - with {:import, %{} = import} <- {:import, Imports.get_import(import_id)}, - {:contact, %Contact{} = contact} <- {:contact, Repo.get(Contact, contact_id)}, - {:source, {:ok, source_mod}} <- {:source, Imports.resolve_source(import.source)} do - - if import.status == "cancelled", do: throw(:cancelled) - - credential = %{url: import.api_url, api_key: import.api_key_encrypted} - - case source_mod.fetch_supplement(credential, source_contact_id, key_atom) do - {:ok, data} -> - attrs = Map.take(data, [:first_met_where, :first_met_additional_info]) - Kith.Contacts.update_contact(contact, attrs) - maybe_cleanup_api_key(import) - :ok - - {:error, :rate_limited} -> - {:snooze, 60} - - {:error, reason} -> - Logger.warning("API supplement failed for contact #{source_contact_id}: #{inspect(reason)}") - {:error, reason} - end - else - {:import, nil} -> {:discard, "Import not found"} - {:contact, nil} -> {:discard, "Contact not found"} - {:source, {:error, _}} -> {:discard, "Unknown source"} - end - catch - :cancelled -> {:discard, "Import cancelled"} - end - - defp maybe_cleanup_api_key(import) do - if Imports.pending_async_jobs_count(import.id) <= 1 do - Imports.wipe_api_key(import) - end - end -end -``` - -- [ ] **Step 3: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/api_supplement_worker_test.exs -v` -Expected: PASS - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/api_supplement_worker.ex test/kith/workers/api_supplement_worker_test.exs -git commit -m "feat: add ApiSupplementWorker for fetching first-met details" -``` - ---- - -### Task 13: ImportFileCleanupWorker - -**Files:** -- Create: `lib/kith/workers/import_file_cleanup_worker.ex` - -- [ ] **Step 1: Implement the cleanup worker** - -Create `lib/kith/workers/import_file_cleanup_worker.ex`: - -```elixir -defmodule Kith.Workers.ImportFileCleanupWorker do - @moduledoc """ - Periodic Oban cron job that deletes import files older than 30 days. - Runs weekly (Sunday 5 AM). - """ - - use Oban.Worker, queue: :default, max_attempts: 1 - - require Logger - - import Ecto.Query - alias Kith.Repo - alias Kith.Imports.Import - - @retention_days 30 - - @impl Oban.Worker - def perform(_job) do - cutoff = DateTime.utc_now() |> DateTime.add(-@retention_days * 86_400, :second) - - imports = - Import - |> where([i], i.status in ["completed", "failed", "cancelled"]) - |> where([i], not is_nil(i.file_storage_key)) - |> where([i], i.completed_at < ^cutoff or (is_nil(i.completed_at) and i.updated_at < ^cutoff)) - |> Repo.all() - - Enum.each(imports, fn import -> - case Kith.Storage.delete(import.file_storage_key) do - :ok -> - import - |> Ecto.Changeset.change(file_storage_key: nil) - |> Repo.update!() - Logger.info("Cleaned up import file for import #{import.id}") - - {:error, reason} -> - Logger.warning("Failed to delete import file #{import.file_storage_key}: #{inspect(reason)}") - end - end) - - :ok - end -end -``` - -- [ ] **Step 2: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/import_file_cleanup_worker.ex -git commit -m "feat: add ImportFileCleanupWorker for 30-day file retention" -``` - ---- - -### Task 14: ImportWizardLive — LiveView with source selection - -**Files:** -- Create: `lib/kith_web/live/import_wizard_live.ex` -- Modify: `lib/kith_web/router.ex` (update route to point to new LiveView) - -- [ ] **Step 1: Create the ImportWizardLive** - -Create `lib/kith_web/live/import_wizard_live.ex`: - -```elixir -defmodule KithWeb.ImportWizardLive do - use KithWeb, :live_view - - alias Kith.Policy - alias Kith.Imports - - import KithWeb.SettingsLive.SettingsLayout - - @max_file_size 50 * 1024 * 1024 - - @impl true - def mount(_params, _session, socket) do - {:ok, - socket - |> assign(:page_title, "Import Contacts") - |> assign(:step, :source_selection) - |> assign(:source, nil) - |> assign(:importing, false) - |> assign(:progress, nil) - |> assign(:results, nil) - |> assign(:summary, nil) - |> assign(:import_job, nil) - |> assign(:api_connected, false) - |> assign(:api_options, %{}) - |> allow_upload(:import_file, - accept: ~w(.vcf .json), - max_file_size: @max_file_size, - max_entries: 1 - )} - end - - @impl true - def handle_params(_params, _uri, socket) do - scope = socket.assigns.current_scope - - unless Policy.can?(scope.user, :create, :import) do - {:noreply, - socket - |> put_flash(:error, "You do not have permission to import contacts.") - |> push_navigate(to: ~p"/")} - else - if connected?(socket) do - Phoenix.PubSub.subscribe(Kith.PubSub, "import:#{scope.account.id}") - end - - # Check for active import - case Imports.get_active_import(scope.account.id) do - %{} = import_job -> - {:noreply, socket |> assign(:step, :progress) |> assign(:import_job, import_job) |> assign(:importing, true)} - nil -> - {:noreply, socket} - end - end - end - - @impl true - def handle_event("select_source", %{"source" => source}, socket) do - {:noreply, assign(socket, :source, source)} - end - - def handle_event("validate", _params, socket) do - {:noreply, socket} - end - - def handle_event("upload_and_validate", _params, socket) do - scope = socket.assigns.current_scope - source = socket.assigns.source - - results = - consume_uploaded_entries(socket, :import_file, fn %{path: path}, entry -> - data = File.read!(path) - - with {:ok, source_mod} <- Imports.resolve_source(source), - {:ok, _} <- source_mod.validate_file(data), - {:ok, summary} <- source_mod.parse_summary(data) do - # Store file - storage_key = "imports/pending/#{entry.client_name}" - {:ok, _} = Kith.Storage.upload_binary(data, storage_key) - {:ok, {summary, storage_key, entry.client_name, byte_size(data)}} - else - {:error, reason} -> {:ok, {:error, reason}} - end - end) - - case List.first(results) do - {summary, storage_key, file_name, file_size} -> - {:noreply, - socket - |> assign(:step, :confirmation) - |> assign(:summary, summary) - |> assign(:file_storage_key, storage_key) - |> assign(:file_name, file_name) - |> assign(:file_size, file_size)} - - {:error, reason} -> - {:noreply, put_flash(socket, :error, reason)} - - nil -> - {:noreply, put_flash(socket, :error, "No file uploaded.")} - end - end - - def handle_event("start_import", _params, socket) do - scope = socket.assigns.current_scope - - attrs = %{ - source: socket.assigns.source, - file_name: socket.assigns.file_name, - file_size: socket.assigns.file_size, - file_storage_key: socket.assigns.file_storage_key, - api_url: socket.assigns[:api_url], - api_key_encrypted: socket.assigns[:api_key], - api_options: socket.assigns.api_options - } - - case Imports.create_import(scope.account.id, scope.user.id, attrs) do - {:ok, import_job} -> - %{import_id: import_job.id} - |> Kith.Workers.ImportSourceWorker.new() - |> Oban.insert() - - {:noreply, - socket - |> assign(:step, :progress) - |> assign(:import_job, import_job) - |> assign(:importing, true)} - - {:error, :import_in_progress} -> - {:noreply, put_flash(socket, :error, "An import is already in progress.")} - - {:error, changeset} -> - {:noreply, put_flash(socket, :error, "Failed to start import: #{inspect(changeset.errors)}")} - end - end - - def handle_event("cancel_import", _params, socket) do - if socket.assigns.import_job do - Imports.cancel_import(socket.assigns.import_job) - end - {:noreply, socket} - end - - def handle_event("test_api_connection", %{"url" => url, "api_key" => api_key}, socket) do - with {:ok, source_mod} <- Imports.resolve_source(socket.assigns.source), - :ok <- source_mod.test_connection(%{url: url, api_key: api_key}) do - options = if function_exported?(source_mod, :api_supplement_options, 0) do - source_mod.api_supplement_options() - else - [] - end - - {:noreply, - socket - |> assign(:api_connected, true) - |> assign(:api_url, url) - |> assign(:api_key, api_key) - |> assign(:supplement_options, options)} - else - {:error, reason} -> - {:noreply, - socket - |> assign(:api_connected, false) - |> put_flash(:error, "Connection failed: #{reason}")} - end - end - - def handle_event("toggle_api_option", %{"key" => key}, socket) do - opts = socket.assigns.api_options - key_atom = String.to_existing_atom(key) - new_opts = Map.update(opts, key_atom, true, &(!&1)) - {:noreply, assign(socket, :api_options, new_opts)} - end - - def handle_event("reset", _params, socket) do - {:noreply, - socket - |> assign(:step, :source_selection) - |> assign(:source, nil) - |> assign(:results, nil) - |> assign(:summary, nil) - |> assign(:importing, false) - |> assign(:progress, nil)} - end - - @impl true - def handle_info({:import_progress, progress}, socket) do - {:noreply, assign(socket, :progress, progress)} - end - - def handle_info({:import_complete, results}, socket) do - {:noreply, - socket - |> assign(:importing, false) - |> assign(:step, :complete) - |> assign(:results, results)} - end - - @impl true - def render(assigns) do - ~H""" - - <.settings_shell current_path={@current_path} current_scope={@current_scope}> - - Import Contacts - <:subtitle>Import contacts from vCard or Monica CRM - - - <%!-- Step 1: Source Selection --%> -
-
- - - -
- -
-
-
- <.live_file_input upload={@uploads.import_file} class="hidden" /> -

- Drag and drop a {if @source == "vcard", do: ".vcf", else: ".json"} file here, or - -

-
- -
- {entry.client_name} - {Float.round(entry.client_size / 1024, 1)} KB -
- -

- {upload_error_message(err)} -

- -
- - Validate & Continue - -
-
-
-
- - <%!-- Step 2: Confirmation --%> -
-
-

Import Summary

-
-
-
Contacts
-
{@summary.contacts}
-
-
-
Notes
-
{@summary.notes}
-
-
-
Relationships
-
{@summary.relationships}
-
-
-
Photos
-
{@summary.photos}
-
-
-
- - <%!-- Monica API section --%> -
-
- Connect to Monica API (optional) -
-
-
- - -
-
- - -
- Test Connection -
- -
-

Connected successfully

-
- - -
-
-
-
-
- -
- Start Import - Back -
-
- - <%!-- Step 3: Progress --%> -
-
-
-

- Processing contact {@progress.current} / {@progress.total}... -

-
-
0, do: round(@progress.current / @progress.total * 100), else: 0}%"} - /> -
-
-

Starting import...

- -
- Cancel Import -
-
-
- - <%!-- Step 4: Complete --%> -
-
-

Import Complete

-
-

{@results["contacts"] || @results[:contacts] || 0} contacts imported

-

0} class="text-[var(--color-warning)]"> - {@results["skipped"] || @results[:skipped]} skipped -

-

0} class="text-[var(--color-error)]"> - {@results["error_count"] || @results[:error_count]} errors -

-
-
- <.link navigate={~p"/contacts"} class="text-[var(--color-accent)] hover:underline text-sm">View contacts - -
-
-
- - - """ - end - - defp upload_error_message(:too_large), do: "File is too large (max 50 MB)" - defp upload_error_message(:not_accepted), do: "Only .vcf and .json files are accepted" - defp upload_error_message(:too_many_files), do: "Only one file at a time" - defp upload_error_message(other), do: "Upload error: #{inspect(other)}" -end -``` - -- [ ] **Step 2: Update the router** - -In `lib/kith_web/router.ex`, find the existing import route (likely `live "/settings/import", SettingsLive.Import`) and replace with: - -```elixir -live "/settings/import", ImportWizardLive -``` - -- [ ] **Step 3: Verify it compiles** - -Run: `cd /Users/basharqassis/projects/kith && mix compile --warnings-as-errors` -Expected: Compiles without errors. - -- [ ] **Step 4: Run existing tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All pass. Some existing import tests may need updating if they reference `SettingsLive.Import` — update them to use `ImportWizardLive`. - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith_web/live/import_wizard_live.ex lib/kith_web/router.ex -git commit -m "feat: add ImportWizardLive with multi-source import wizard" -``` - ---- - -### Task 15: Enqueue async jobs after Monica import - -**Files:** -- Modify: `lib/kith/workers/import_source_worker.ex` - -After the Monica source completes `import/4`, the `ImportSourceWorker` needs to enqueue `PhotoSyncWorker` and `ApiSupplementWorker` jobs based on `import.api_options`. - -- [ ] **Step 1: Add post-import job scheduling to ImportSourceWorker** - -In `lib/kith/workers/import_source_worker.ex`, after the `source_mod.import/4` call succeeds, add: - -```elixir - # Enqueue async jobs for photo sync and API supplements - if import.api_options do - enqueue_async_jobs(import) - end -``` - -Add private function: - -```elixir - defp enqueue_async_jobs(%{api_url: nil}), do: :ok - defp enqueue_async_jobs(%{api_key_encrypted: nil}), do: :ok - defp enqueue_async_jobs(import) do - import_records = Kith.Imports.list_import_records(import.id) - - # Photo sync jobs - if import.api_options["photos"] || import.api_options[:photos] do - photo_records = Enum.filter(import_records, &(&1.source_entity_type == "photo")) - - photo_records - |> Enum.with_index() - |> Enum.each(fn {rec, idx} -> - batch = div(idx, 50) - delay = batch * 60 - - %{import_id: import.id, photo_id: rec.local_entity_id, source_photo_id: rec.source_entity_id} - |> Kith.Workers.PhotoSyncWorker.new(scheduled_at: DateTime.add(DateTime.utc_now(), delay, :second)) - |> Oban.insert() - end) - end - - # API supplement jobs — only for contacts that had first_met_date in the export. - # Re-read the file to determine which contacts need supplement data. - # This avoids storing per-contact flags and keeps import_records generic. - if import.api_options["first_met_details"] || import.api_options[:first_met_details] do - contacts_with_first_met = case Kith.Storage.read(import.file_storage_key) do - {:ok, data} -> - case Jason.decode(data) do - {:ok, parsed} -> - (get_in(parsed, ["contacts", "data"]) || []) - |> Enum.filter(fn c -> - date = get_in(c, ["first_met_date", "data", "date"]) - date != nil - end) - |> Enum.map(& &1["uuid"]) - |> MapSet.new() - _ -> MapSet.new() - end - _ -> MapSet.new() - end - - contact_records = - import_records - |> Enum.filter(&(&1.source_entity_type == "contact")) - |> Enum.filter(&MapSet.member?(contacts_with_first_met, &1.source_entity_id)) - - contact_records - |> Enum.with_index() - |> Enum.each(fn {rec, idx} -> - batch = div(idx, 50) - delay = batch * 60 - - %{ - import_id: import.id, - contact_id: rec.local_entity_id, - source_contact_id: rec.source_entity_id, - key: "first_met_details" - } - |> Kith.Workers.ApiSupplementWorker.new(scheduled_at: DateTime.add(DateTime.utc_now(), delay, :second)) - |> Oban.insert() - end) - end - end -``` - -- [ ] **Step 2: Add list_import_records to Imports context** - -In `lib/kith/imports.ex`, add: - -```elixir - def list_import_records(import_id) do - ImportRecord - |> where([r], r.import_id == ^import_id) - |> Repo.all() - end - - def count_import_records_by_type(import_id, entity_type) do - ImportRecord - |> where([r], r.import_id == ^import_id) - |> where([r], r.source_entity_type == ^entity_type) - |> Repo.aggregate(:count) - end -``` - -- [ ] **Step 3: Run full test suite** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All pass. - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/import_source_worker.ex lib/kith/imports.ex -git commit -m "feat: enqueue photo sync and API supplement jobs after import" -``` - ---- - -### Task 16: Final verification - -- [ ] **Step 1: Run full test suite** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All tests pass. - -- [ ] **Step 2: Verify compilation with no warnings** - -Run: `cd /Users/basharqassis/projects/kith && mix compile --warnings-as-errors` -Expected: Clean compilation. - -- [ ] **Step 3: Verify migrations** - -Run: `cd /Users/basharqassis/projects/kith && mix ecto.rollback -n 2 && mix ecto.migrate` -Expected: Both migrations are reversible. - -- [ ] **Step 4: Manual smoke test** - -Start the server: `cd /Users/basharqassis/projects/kith && mix phx.server` -Navigate to `/settings/import`. Verify: -- Source selection (vCard/Monica tabs) renders -- File upload works for both types -- Validation shows summary -- VCard import runs end-to-end - -- [ ] **Step 5: Final commit if needed** - -```bash -cd /Users/basharqassis/projects/kith -git status -# Review and commit any remaining changes -``` diff --git a/docs/superpowers/specs/2026-03-21-extensible-import-system-design.md b/docs/superpowers/specs/2026-03-21-extensible-import-system-design.md deleted file mode 100644 index 94ef3fc..0000000 --- a/docs/superpowers/specs/2026-03-21-extensible-import-system-design.md +++ /dev/null @@ -1,408 +0,0 @@ -# Extensible Import System with Monica CRM Support - -**Date:** 2026-03-21 -**Status:** Approved - -## Overview - -Build an extensible import framework for Kith that supports multiple data sources (VCF, Monica CRM, future platforms). The first new source is Monica CRM, importing contacts and all associated data from a JSON export file, with optional photo sync via Monica's REST API. - -**Dependencies:** -- [Contact "First Met" Fields & Schema Additions](2026-03-21-contact-first-met-fields-design.md) — must be implemented first; adds `middle_name`, `first_met_at`, `first_met_where`, `first_met_through_id`, `first_met_additional_info`, `first_met_year_unknown`, and `birthdate_year_unknown` to the Contact schema. - -Core principles: -- Kith's schema stays clean — no source-specific fields on core tables -- Import tracking via a generic `import_records` table for source ID → local ID mapping -- Behaviour-based source plugins for extensibility -- Per-contact changeset transactions for granular error reporting -- UI-driven import wizard with real-time progress - -## Database Schema - -### `imports` table - -Tracks each import job. - -| Column | Type | Notes | -|---|---|---| -| id | bigint PK | | -| account_id | references accounts | | -| user_id | references users | | -| source | string | "monica", "vcard", etc. | -| status | string | pending, processing, completed, failed, cancelled | -| file_name | string | | -| file_size | integer | | -| file_storage_key | string | reference to file in Kith.Storage | -| api_url | string | nullable, for photo sync | -| api_key_encrypted | binary | nullable, use `Kith.Vault.EncryptedBinary` Ecto type (auto-encrypts at rest via Cloak, same pattern as `Account.immich_api_key`) | -| api_options | map | nullable, typed as `%{photos: boolean(), first_met_details: boolean()}` — keys match `api_supplement_options()` keys; validated on create | -| summary | map | `%{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: [...]}` — matches `import_summary` type; `errors` capped at 50 entries, `error_count` has true total | -| started_at | utc_datetime | | -| completed_at | utc_datetime | | -| timestamps | | | - -### `import_records` table - -Maps source system IDs to Kith IDs. Keeps all source-specific IDs out of Kith's core schemas. Used for resolving cross-entity references (e.g., `first_met_through` UUID → local contact ID) and for identifying previously-imported entities on re-import. - -| Column | Type | Notes | -|---|---|---| -| id | bigint PK | | -| account_id | references accounts | | -| import_id | references imports | Set on first import, updated to latest import_id on re-import | -| source | string | "monica", "vcard", etc. | -| source_entity_type | string | "contact", "note", etc. | -| source_entity_id | string | UUID from source system | -| local_entity_type | string | "contact", "note", etc. | -| local_entity_id | bigint | Kith's DB id | -| timestamps | | | - -**Unique index:** `[account_id, source, source_entity_type, source_entity_id]` - -**Scope note:** This index deduplicates within a single source system per account. The same real-world entity imported from two different sources (e.g., VCard and Monica) will create two `import_records` entries — this is intentional; cross-source deduplication is a separate concern handled by content-level duplicate detection. - -## Import Framework - -### File Storage - -Uploaded files are stored via `Kith.Storage` under `imports/{import_id}/` and referenced by storage key in the `imports` table. The Oban worker receives only the `import_id` — never raw file data in job args (Oban args are JSONB with practical size limits). The worker loads the file from storage at runtime. - -The `imports` table includes a `file_storage_key` column for this reference. - -**File size expectation:** The `Source.import/4` callback receives the entire file as a binary. Monica JSON exports are typically 1–50 MB for most accounts. For the expected range this is fine; if a source could produce files >100 MB, it should implement streaming internally. The `ImportSourceWorker` loads the file from storage into memory before calling the source. - -### Concurrent Import Guard - -Two-layer guard: - -1. **Database constraint:** Add a unique partial index on `imports (account_id) WHERE status IN ('pending', 'processing')`. This prevents race conditions where two concurrent requests both pass the application-level check. - -2. **Application check:** `Kith.Imports.create_import/3` queries for an existing active import and returns `{:error, :import_in_progress}` if found. The UI disables the "Start Import" button when an import is active. - -Concurrent imports won't corrupt data (upserts are idempotent), but the guard prevents photo sync jobs from competing for API rate limits. - -### Source Behaviour - -```elixir -defmodule Kith.Imports.Source do - @type opts :: map() - @type credential :: %{url: String.t(), api_key: String.t()} - @type import_summary :: %{ - contacts: non_neg_integer(), - notes: non_neg_integer(), - skipped: non_neg_integer(), - error_count: non_neg_integer(), - errors: [String.t()] # capped at 50 entries; error_count has the true total - } - - @callback name() :: String.t() - @callback file_types() :: [String.t()] - # validate_file: structural check only (correct format, required keys present) - @callback validate_file(binary()) :: {:ok, map()} | {:error, String.t()} - # parse_summary: deeper parse returning entity counts for the confirmation screen - @callback parse_summary(binary()) :: {:ok, map()} | {:error, String.t()} - @callback import(account_id :: integer(), user_id :: integer(), data :: binary(), opts()) :: - {:ok, import_summary()} | {:error, term()} - @callback supports_api?() :: boolean() - - # Optional callbacks — only required when supports_api?() returns true - @callback test_connection(credential()) :: :ok | {:error, String.t()} - @callback fetch_photo(credential(), resource_id :: String.t()) :: - {:ok, binary()} | {:error, term()} - # Returns list of supplementary data types the API can provide beyond the file export - @callback api_supplement_options() :: [%{key: atom(), label: String.t(), description: String.t()}] - @callback fetch_supplement(credential(), contact_source_id :: String.t(), key :: atom()) :: - {:ok, map()} | {:error, term()} - - @optional_callbacks [test_connection: 1, fetch_photo: 2, api_supplement_options: 0, fetch_supplement: 3] -end -``` - -### Cancellation - -Import jobs support cancellation. The worker checks a `cancelled` flag on the import record between each contact. The UI shows a "Cancel Import" button during processing. On cancel: -1. Set `imports.status` to `cancelled` -2. Worker checks status before each contact, stops if cancelled -3. Already-imported contacts remain (import is resumable) -4. Photo sync jobs for cancelled imports are discarded via `Oban.cancel_all_jobs/1` - -### Source Implementations - -- `Kith.Imports.Sources.VCard` — wraps existing `Kith.VCard.Parser` logic -- `Kith.Imports.Sources.Monica` — new, parses JSON export + API photo sync - -### Context Module - -`Kith.Imports` — manages import jobs, resolves source modules, handles `import_records` lookups. - -Key functions: -- `create_import/3` — create an import job record -- `find_import_record/4` — look up existing record by source + entity type + entity id -- `record_imported_entity/5` — upsert an import_record (create or update import_id) -- `resolve_source/1` — map source string to module - -### Generic Worker - -`Kith.Workers.ImportSourceWorker` — Oban worker that: -1. Loads the import job -2. Resolves the source module -3. Calls `source.import/4` -4. Broadcasts progress via PubSub -5. Updates import job status and summary - -Replaces the existing `ImportWorker` for new imports. - -## Monica Source — Data Mapping - -### Processing Order (dependency chain) - -**Scope note:** The Monica JSON export contains: contacts, contact_fields, addresses, notes, reminders, pets, photos, activities, and relationships. It does NOT contain gifts, debts, calls, life_events, or conversations — those are Kith-specific features not present in Monica. - -**Creator/Author assignment:** Many Kith schemas require `creator_id` or `author_id` (Note, Reminder, Activity, etc.). During import, these are set to the `user_id` of the user who initiated the import. - -**Phase 1 — Reference data** (no dependencies): -- Genders → `Kith.Contacts.Gender` (find-or-create by name) -- Contact field types → `Kith.Contacts.ContactFieldType` (find-or-create by name) -- Relationship types → `Kith.Contacts.RelationshipType` (find-or-create by name) -- Activity type categories → `Kith.Contacts.ActivityTypeCategory` (find-or-create by name, needed for activities) -- Tags → `Kith.Contacts.Tag` (find-or-create by name, scoped to account) -- Pet categories → mapped to Kith's `species` enum: - -| Monica pet_category | Kith species | -|---|---| -| Dog | dog | -| Cat | cat | -| Bird | bird | -| Fish | fish | -| Reptile | reptile | -| Rabbit | rabbit | -| Hamster | hamster | -| (all others) | other | - -**Phase 2 — Contacts** (depends on: genders): - -| Monica Property | Kith Field | Notes | -|---|---|---| -| first_name | first_name | direct | -| last_name | last_name | direct | -| middle_name | middle_name | direct | -| nickname | nickname | direct | -| company | company | direct | -| job | occupation | rename | -| is_starred | favorite | rename | -| is_active: false | is_archived: true | inverted | -| is_dead | deceased | rename | -| description | description | direct | -| first_met_date (nested special_date object) | first_met_at | extract `.date` from the nested object; see Partial Date Handling below | -| first_met_through (UUID) | first_met_through_id | resolve via import_records after all contacts imported (Phase 4, alongside relationships) | -| first_met_where | first_met_where | NOT in JSON export. Fetched via API supplement (`GET /api/contacts/{id}`) if user enables "Fetch how we met details" option. | -| first_met_additional_info | first_met_additional_info | NOT in JSON export. Fetched via API supplement (same call as above). | -| gender (UUID) | gender_id | via import_records lookup | -| birthdate (nested special_date object) | birthdate | Same structure as `first_met_date` — extract `.date`; see Partial Date Handling below | -| tags (UUID array) | tags | find-or-create tags by name (account-scoped), then insert join table rows | - -**Phase 3 — Contact children** (depends on: contacts, reference data): - -Each is nested inside its parent contact in the JSON. - -- `contact_field` → `Kith.Contacts.ContactField` (type UUID → contact_field_type_id via lookup) -- `address` → `Kith.Contacts.Address` (Monica splits address/place — flatten into Kith's single address schema) -- `note` → `Kith.Contacts.Note` -- `reminder` → `Kith.Reminders.Reminder` -- `pet` → `Kith.Contacts.Pet` (pet_category → species enum mapping) -- `photo` → `Kith.Contacts.Photo` (metadata only; `storage_key` set to a `"pending_sync:{source_photo_uuid}"` placeholder; file downloaded in Phase 5. Photo records with `pending_sync:` prefix are treated as unsynced: the `Photo` context module should expose a `Photo.pending_sync?/1` helper that pattern-matches the prefix. The UI uses this to show a placeholder/spinner instead of calling `Storage.url/1` on the pending key. The API omits the `url` field for pending photos.) -- `activity` → `Kith.Activities.Activity` (with `activity_type_category_id` via lookup; activities shared across multiple contacts: deduplicate by UUID — on first encounter, create the activity and its join table entry; on subsequent contacts referencing the same activity UUID, add only the join table entry). The worker maintains a `MapSet` of processed activity UUIDs in memory during the import to track which activities have been created. - -**Resumability note:** On a resumed import (after cancellation), the in-memory `MapSet` starts empty. The worker must first check `import_records` for existing activity mappings before attempting insert. If an `import_record` exists for an activity UUID, skip creation and insert only the join table entry. This makes the `MapSet` an optimization (avoids repeated DB lookups within a single run), not a source of truth. - -**Phase 4 — Cross-contact references** (depends on: contacts, relationship types): - -Relationships (top-level in the JSON): -- Each references two contact UUIDs (`contact_is`, `of_contact`) and a relationship type -- Look up both contacts via `import_records` -- Look up relationship type -- Create `Kith.Contacts.Relationship` - -First-met-through links: -- For contacts with a `first_met_through` UUID, look up the referenced contact via `import_records` -- Update the contact's `first_met_through_id` -- If the referenced contact was not imported, log a warning and leave null - -**Phase 5 — Photo files** (async, depends on: photo records from phase 3): - -Handled by separate `PhotoSyncWorker` jobs. See Photo Sync section. - -### Partial Date Handling - -Monica's `birthdate` and `first_met_date` are nested `special_date` objects with `is_year_unknown` and `is_age_based` flags. Kith's `birthdate` and `first_met_at` are `:date` columns that require a full year+month+day. - -**Schema change required:** Add `birthdate_year_unknown` (boolean, default false) to `Kith.Contacts.Contact`. When Monica provides a date with `is_year_unknown: true`, store the date using a sentinel year (year 1) and set `birthdate_year_unknown: true`. The UI and API should omit the year when this flag is set. - -Same approach for `first_met_at` — add `first_met_year_unknown` (boolean, default false) to the Contact schema. This field is included in the First Met Fields migration. - -When `is_age_based` is true, Monica computed the birthdate from an entered age — treat the year as approximate but known (import as a normal date, don't set the unknown flag). - -### Per-Contact Flow - -``` -For each contact in JSON: - 1. Check import status — if cancelled, stop processing - 2. Look up import_records for [account, "monica", "contact", contact.uuid] - 3. If found: - a. Check if local contact is soft-deleted (deleted_at set) - → skip, log "previously deleted in Kith, not restoring" - b. Otherwise → upsert contact + re-import children in Ecto.Multi - 4. If not found → insert contact + all children in Ecto.Multi - 5. Upsert import_record with current import_id - 6. Broadcast progress via PubSub — every `max(1, total ÷ 50)` contacts (adaptive: frequent enough for small imports, not excessive for large ones) - 7. Log result with contact name for debugging - 8. On changeset error → log detailed error (capped at 50 in summary), continue to next contact -``` - -### Relationship Edge Cases - -Phase 4 imports relationships after all contacts. If one of the two referenced contacts failed to import (changeset error in Phase 2), the relationship is skipped with a warning log: "Skipping relationship {type} between {uuid_a} and {uuid_b}: contact {failed_uuid} was not imported." - -## Photo Sync - -### Rate Limiting - -Monica defaults to 60 requests/minute per API key. - -**Approach:** Each photo is an independent Oban job with staggered scheduling. - -- After main import completes, enqueue one `PhotoSyncWorker` job per photo -- Jobs are scheduled with staggered `scheduled_at` timestamps: batches of 50 with 60-second gaps -- Each job is independent — a retry only re-downloads that single photo, never the batch - -### PhotoSyncWorker - -`Kith.Workers.PhotoSyncWorker` — Oban worker, queue: `:photo_sync` - -**Config requirement:** Add `photo_sync: 5` to Oban queues in `config/config.exs`. - -Per job: -1. Load the photo record and import record -2. Check `Kith.Storage.check_storage_limit/2` — if account is at capacity, mark photo as failed and return `:discard` -3. Call `GET {monica_url}/api/photos/{source_photo_id}` with Bearer token -4. Download binary → store via `Kith.Storage` -5. Update `Kith.Contacts.Photo` record with stored file path -6. On HTTP 429 → return `{:snooze, 60}` (Oban reschedules after 60s, does NOT reprocess batch) -7. On max retries exhausted → delete the Photo record (contact becomes photoless rather than having a permanently broken reference) -8. On other errors → Oban retries with backoff (max 3 attempts) - -### API Supplement Worker - -`Kith.Workers.ApiSupplementWorker` — Oban worker, queue: `:api_supplement` - -**Config requirement:** Add `api_supplement: 3` to Oban queues in `config/config.exs`. - -Handles all non-photo API fetches (first_met details, future supplement types). One job per contact **that has a `first_met_date` in the JSON export** — contacts without any first-met data are skipped (significantly reduces API calls). Staggered like photo sync (batches of 50, 60-second gaps). - -Per job: -1. Load the import record and contact -2. Call `GET {monica_url}/api/contacts/{source_contact_id}` with Bearer token -3. Extract `first_met_where` and `first_met_additional_information` from the response -4. Update the Kith contact record -5. On HTTP 429 → `{:snooze, 60}` -6. On other errors → Oban retries with backoff (max 3 attempts) - -The worker checks `api_options` on the import to determine which fields to fetch. If only `first_met_details` is selected (no photos), only this worker runs. If both are selected, both workers run concurrently with independent rate limiting. - -### Progress - -Photo sync and API supplement progress are tracked separately from the main import: -- Import summary includes `photos_total`, `photos_synced`, `supplements_total`, `supplements_synced` counters -- PubSub broadcasts progress for the UI on topic `"import:#{account_id}"` - -### Post-Import Cleanup - -**File cleanup:** Import files stored in `imports/{import_id}/` are retained for 30 days after import completion, then deleted. Add a periodic Oban cron job (`ImportFileCleanupWorker`, queue: `:default`, weekly schedule `"0 5 * * 0"`) that queries for completed/failed imports older than 30 days with a non-null `file_storage_key`, deletes the file from Storage, and nullifies the `file_storage_key`. - -**API key lifecycle:** When all async jobs for an import are complete (photo sync + API supplement), wipe `api_key_encrypted` from the imports record. The `ImportSourceWorker` checks after the main import; the last completing `PhotoSyncWorker` or `ApiSupplementWorker` also checks. A simple approach: after each async job completes, query for remaining pending jobs for that import — if zero remain, nullify the API key. - -**Failed photo cleanup:** When a `PhotoSyncWorker` job exhausts all 3 retry attempts, delete the `Kith.Contacts.Photo` record entirely. The contact simply has no photo rather than a permanently broken `pending_sync:` reference. This is handled in the worker's `max_attempts` exceeded callback. - -## Import Wizard UI - -### Location - -Replaces the existing import UI at `KithWeb.SettingsLive.Import` (`/settings/import`). The new `ImportWizardLive` handles multiple source types and is mounted at the same route. - -### Flow - -**Step 1 — Source selection:** -- Tabs or radio: "vCard (.vcf)" | "Monica CRM" -- Selecting a source shows its specific form - -**Step 2 — Monica form:** -- File upload (accepts `.json`) -- On upload: validate JSON structure (check `version`, `app_version`, `account.data`) -- Show summary: "Found 851 contacts, 26 relationships, 313 photos" -- Optional expandable section: "Connect to Monica API" - - Monica URL field - - API key field - - "Test Connection" button → hits `/api/me`, shows inline success/failure - - On successful connection, show checkboxes for API-supplemented data (from `api_supplement_options/0`): - - [x] Sync photos (313 found) - - [x] Fetch "How we met" details (first_met_where, first_met_additional_info) - - Future sources can add their own options here - - Checkboxes are only shown after a successful connection test - - Selected options are stored on the `imports` record as `api_options` (map) - -**Step 3 — Confirmation:** -- Summary table of what will be imported -- On re-import: "247 new contacts, 604 existing (will be updated)" -- "Start Import" button - -**Step 4 — Progress (LiveView):** -- Progress bar: "Processing contact 142/851..." -- Running counters: imported / updated / skipped / errors -- Expandable error log with specific failures -- On main import completion: summary card with totals -- If API options enabled, secondary progress bars that continue after main import: - - "Syncing photos: 42/313" (if photos selected) - - "Fetching details: 100/851" (if first_met_details selected) - -### Implementation - -`ImportWizardLive` LiveView with source-specific components: -- `MonicaImportComponent` — handles Monica-specific form, validation, summary -- `VcardImportComponent` — wraps existing VCF import UI - -PubSub updates from workers drive real-time progress — same pattern as existing `ImportWorker`. - -## VCard Refactoring - -Wrap existing VCard import into the new framework: - -- `Kith.Imports.Sources.VCard` implements `Source` behaviour -- Internally delegates to existing `Kith.VCard.Parser` -- VCard imports also write to `import_records` for consistency -- Existing `ImportWorker` is deprecated; new imports use `ImportSourceWorker` -- Old worker remains for any in-flight jobs to complete - -**Data flow:** The `ImportSourceWorker` loads the file from `Kith.Storage` using `file_storage_key`, reads it into a binary, and passes it to `source.import/4`. The VCard Source receives the binary and delegates to `Kith.VCard.Parser.parse/1` (same input format as today). This means the upload step must store the file via `Kith.Storage` before enqueuing the Oban job — the current `ImportWorker` pattern of passing `file_data` in job args is not carried over. - -**Existing imports:** Contacts previously imported via the old `ImportWorker` have no `import_records` entries. The first VCard import under the new system treats all contacts as new — existing `contact_exists?/2` duplicate detection (email/name match) is not carried into the new framework. Users who re-import an old VCard may see duplicates; this is acceptable as a one-time migration cost and can be resolved via the existing duplicate detection feature (`DuplicateDetectionWorker`). - -## File Structure - -``` -lib/kith/imports.ex # Context module -lib/kith/imports/source.ex # Behaviour definition -lib/kith/imports/import.ex # Import schema (job tracking) -lib/kith/imports/import_record.ex # ImportRecord schema (dedup) -lib/kith/imports/sources/monica.ex # Monica source implementation -lib/kith/imports/sources/vcard.ex # VCard source (wraps existing parser) -lib/kith/workers/import_source_worker.ex # Generic import Oban worker -lib/kith/workers/photo_sync_worker.ex # Photo download Oban worker -lib/kith/workers/api_supplement_worker.ex # API data supplement Oban worker -lib/kith/workers/import_file_cleanup_worker.ex # Periodic cleanup of import files (30-day retention) - -lib/kith_web/live/import_wizard_live.ex # Import wizard LiveView -lib/kith_web/live/components/monica_import_component.ex -lib/kith_web/live/components/vcard_import_component.ex - -priv/repo/migrations/TIMESTAMP_create_imports_and_import_records.exs -``` diff --git a/lib/kith/workers/monica_api_crawl_worker.ex b/lib/kith/workers/monica_api_crawl_worker.ex index 5714d25..292ff82 100644 --- a/lib/kith/workers/monica_api_crawl_worker.ex +++ b/lib/kith/workers/monica_api_crawl_worker.ex @@ -2,9 +2,11 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do @moduledoc """ Oban worker that crawls a Monica CRM API instance and imports all contacts. - Single long-running job that paginates through the contacts API, imports - contacts with all embedded data, resolves cross-references, and optionally - imports photos. + Paginates through the contacts API, imports contacts with all embedded data, + and resolves cross-references. When the user opts into photos via + `api_options["photos"]`, this worker enqueues `MonicaPhotoSyncWorker` after + the main crawl completes — photo import runs as a separate job so the main + import status reflects only contact work. Connection is validated in the import wizard before this job is enqueued. """ From d5f5fd9773e8cfcb6b507fead3468713b00ad5da Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 22:01:53 +0300 Subject: [PATCH 10/58] docs: add design spec for account reset completeness fix Spec captures the photo-sync-after-reset bug, its root cause (orphaned import_records pointing at deleted contacts), and the decomposition of AccountResetWorker into a thin orchestrator plus one Cleanup module per data domain. Covers module layout, order-of-operations, account-scoped Oban job cancellation, error handling, and the test plan including a mandatory cross-account isolation check on every Cleanup module. --- ...05-15-account-reset-completeness-design.md | 494 ++++++++++++++++++ 1 file changed, 494 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md diff --git a/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md b/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md new file mode 100644 index 0000000..dc5f317 --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md @@ -0,0 +1,494 @@ +# Account Reset Completeness + +**Date:** 2026-05-15 +**Status:** Approved +**Scope:** Personal CRM (`kith`) + +## Context + +On dev, a Monica re-import after an account reset failed photo sync. The Monica +photo sync worker (`Kith.Workers.MonicaPhotoSyncWorker`) reported "contact is +deleted" for photos whose `contact.id` matched contacts that should no longer +exist. Root cause: the current `Kith.Workers.AccountResetWorker` hard-deletes +contacts (via CASCADE) but does **not** delete the polymorphic mapping rows in +`import_records`, nor the parent `imports` rows. On re-import: + +1. `MonicaApi.crawl/5` looks up `find_import_record(account, "monica_api", "contact", source_id)` +2. Finds a stale row from the prior import, pointing at a now-deleted `local_entity_id` +3. `handle_existing_contact` calls `Repo.get(Contacts.Contact, local_id)` → `nil` → + the function falls through to `do_create_api_contact` which creates a new contact + and inserts a second `import_record` for the same `(account_id, source, source_entity_type, source_entity_id)` tuple +4. The unique constraint on that tuple raises, OR the photo sync subsequently calls + `Repo.one` against the same lookup and crashes on `Ecto.MultipleResultsError` + +The bug surfaced as photo sync silently failing, but the underlying issue is +that the reset is incomplete in multiple dimensions, not just imports. + +### Other tables left orphaned by the current reset + +| Table | Today's behavior | Should be | +|---|---|---| +| `imports` | Untouched | Wiped | +| `import_records` | Untouched | Wiped | +| `conversations` | Untouched | Wiped (CASCADE → `messages`) | +| `journal_entries` | Untouched | Wiped | +| `tasks` | Untouched | Wiped | +| `reminders` (records) | Oban jobs cancelled; records remain | Wiped (CASCADE → `reminder_rules`, `reminder_instances`) | +| Reference data (genders, types) | Preserved | Preserved (no change) | +| `account_invitations` | Preserved | Preserved (no change) | + +### In-flight Oban jobs are also a hazard + +If a `MonicaApiCrawlWorker` or `MonicaPhotoSyncWorker` is running when reset +starts, it keeps inserting rows after the wipe. The current reset only cancels +reminder jobs (`cancel_reminder_jobs/1`). It must also cancel pending/scheduled +import-related jobs — but **only those belonging to the resetting account**, so +no other account's work is touched. + +## Goals + +1. After `AccountResetWorker` completes, no account-scoped data for the target + account remains beyond reference data (genders, relationship_types, + contact_field_types, etc.) and `account_invitations`. +2. A subsequent re-import (Monica API or vCard) for the same account succeeds + without seeing stale `import_records` from prior runs. +3. The reset cancels all in-flight import-related Oban jobs for the target + account before wiping data, eliminating the mid-flight write race. +4. Every cleanup operation is account-scoped. Running reset on account A does + not affect any row, file, or Oban job belonging to account B. +5. The fix does not turn `AccountResetWorker` into a god-module. Each domain's + cleanup lives next to that domain. + +## Non-goals + +- Preserving import history after reset. "Completely wipe" means the `imports` + rows go too. The Oban job record (state, completed_at) is the audit trail. +- Reference data preservation changes. Genders, relationship_types, etc. + continue to be preserved (current behavior). +- Hardening the photo sync worker against stale state as a belt-and-suspenders + defense. With reset cancelling jobs and wiping `import_records`, the worker + cannot see stale references. If a future bug bypasses reset, that's a + separate fix. +- Multi-tenant data-isolation review across the rest of the codebase. This + spec only addresses the reset path. + +## Out of scope + +- Soft-delete of accounts themselves (the `accounts` row stays). +- User accounts (`users` table). Reset clears data, not auth. +- Custom contact_field_types or other reference data the user has added — + preserved per the recommendation in the brainstorming session. +- Adding a DB-level FK to `import_records.local_entity_id`. The polymorphic + mapping pattern is intentional. + +## Design + +### Module decomposition + +The worker becomes pure orchestration. Each domain's cleanup module lives in +that domain's namespace. No `@behaviour` ceremony — a function-naming +convention (`wipe_for_account/1` returning `:ok`) is sufficient for one +consumer. + +``` +lib/kith/ +├── audit_logs/cleanup.ex # NEW — wipe audit_logs +├── contacts/cleanup.ex # NEW — hard-delete contacts (CASCADE) +├── contacts/tags_and_activities_cleanup.ex # NEW — account-scoped tags + activities +├── conversations/cleanup.ex # NEW — wipe conversations (CASCADE → messages) +├── imports/cleanup.ex # NEW — wipe imports + import_records +├── imports/job_cancellation.ex # NEW — cancel pending Oban jobs for THIS account's imports +├── journal/cleanup.ex # NEW — wipe journal_entries +├── reminders/cleanup.ex # NEW — cancel reminder Oban jobs + wipe reminders (CASCADE) +├── storage/account_cleanup.ex # NEW — delete photo + document + import-upload files +├── tasks/cleanup.ex # NEW — wipe tasks +└── workers/account_reset_worker.ex # REFACTOR — orchestrator only (~40 LoC) +``` + +Each cleanup module exposes a single function: + +```elixir +defmodule Kith.Imports.Cleanup do + @moduledoc "Wipes all import history for a single account." + + alias Kith.{Imports.Import, Imports.ImportRecord, Repo} + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {records, _} = Repo.delete_all(from(r in ImportRecord, where: r.account_id == ^account_id)) + {imports, _} = Repo.delete_all(from(i in Import, where: i.account_id == ^account_id)) + Logger.info("[Imports.Cleanup] wiped #{records} records + #{imports} imports for account #{account_id}") + :ok + end +end +``` + +The worker: + +```elixir +defmodule Kith.Workers.AccountResetWorker do + use Oban.Worker, + queue: :default, + max_attempts: 3, + unique: [period: 300, fields: [:args], keys: [:account_id]] + + require Logger + + alias Kith.{AuditLogs, Contacts, Conversations, Imports, Journal, Reminders, Storage, Tasks} + + @cleaners [ + Imports.JobCancellation, + Storage.AccountCleanup, + Contacts.Cleanup, + Imports.Cleanup, + Conversations.Cleanup, + Reminders.Cleanup, + Tasks.Cleanup, + Journal.Cleanup, + Contacts.TagsAndActivitiesCleanup, + AuditLogs.Cleanup + ] + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"account_id" => account_id}}) do + Logger.metadata(account_id: account_id, worker: "AccountReset") + Logger.info("[AccountReset] starting reset for account #{account_id}") + write_initiated_audit_log(account_id) + + Enum.each(@cleaners, fn cleaner -> + Logger.info("[AccountReset] running #{inspect(cleaner)}") + :ok = cleaner.wipe_for_account(account_id) + end) + + Logger.info("[AccountReset] completed reset for account #{account_id}") + :ok + end + + defp write_initiated_audit_log(account_id) do + Kith.AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{reason: "Account data reset initiated"} + }) + end +end +``` + +### Data flow & order-of-operations + +The ordering is load-bearing: + +1. **`Imports.JobCancellation`** — must run FIRST. Otherwise a running + `MonicaApiCrawlWorker` keeps inserting rows after the wipe. +2. **`Storage.AccountCleanup`** — must run BEFORE `Contacts.Cleanup`. Contact + CASCADE deletes the `photos` and `documents` rows; once those rows are gone, + we can no longer iterate their `storage_key` values to delete files. Also + sweeps `imports.file_storage_key` for uploaded vCards. +3. **`Contacts.Cleanup`** — hard-deletes contacts; CASCADE removes addresses, + contact_fields, photos rows, documents rows, notes, debts, gifts, pets, + emotions, relationships, calls, life_events, duplicate_candidates, + immich_candidates. +4. **`Imports.Cleanup`** — wipes `import_records` then `imports`. Runs AFTER + contacts so `local_entity_id` references are already dangling — we just + sweep the whole table for this account, no coordination needed. +5. **`Conversations.Cleanup`** — wipes conversations; CASCADE removes messages. +6. **`Reminders.Cleanup`** — first cancels reminder Oban jobs (matching the + existing pattern, scoped to this account), then deletes reminders; CASCADE + removes reminder_rules, reminder_instances. +7. **`Tasks.Cleanup`** — wipes tasks. +8. **`Journal.Cleanup`** — wipes journal_entries. +9. **`Contacts.TagsAndActivitiesCleanup`** — wipes the account-scoped `tags` + and `activities` tables (no contact FK, so not cleared by step 3). +10. **`AuditLogs.Cleanup`** — runs LAST. The "account_data_reset" audit log + written at start needs to live until the reset completes; wiping it earlier + would erase the audit trail of the reset itself. + +### Account-scoped Oban job cancellation + +`Kith.Imports.JobCancellation.wipe_for_account/1` cancels jobs by querying +`Oban.Job` directly with account-scoped filters: + +```elixir +defmodule Kith.Imports.JobCancellation do + @moduledoc """ + Cancels all pending/scheduled Oban jobs for a single account's imports. + Scoping rule: only this account's import_ids and account_id are matched. + No other account's jobs are touched. + """ + + alias Kith.{Imports.Import, Repo} + import Ecto.Query + require Logger + + @import_workers ~w[ + Elixir.Kith.Workers.MonicaApiCrawlWorker + Elixir.Kith.Workers.MonicaPhotoSyncWorker + Elixir.Kith.Workers.MonicaDocumentImportWorker + Elixir.Kith.Workers.ImportSourceWorker + ] + + @cancellable_states ~w[available scheduled retryable executing] + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + import_ids = account_import_ids(account_id) + + import_cancelled = cancel_jobs_by_import_id(import_ids) + account_cancelled = cancel_jobs_by_account_id(account_id) + + Logger.info( + "[Imports.JobCancellation] cancelled #{import_cancelled} import job(s) + " <> + "#{account_cancelled} account-scoped job(s) for account #{account_id}" + ) + + :ok + end + + defp account_import_ids(account_id) do + Repo.all(from(i in Import, where: i.account_id == ^account_id, select: i.id)) + end + + defp cancel_jobs_by_import_id([]), do: 0 + + defp cancel_jobs_by_import_id(import_ids) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker in ^@import_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'import_id')::int", j.args) in ^import_ids + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end + + defp cancel_jobs_by_account_id(account_id) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker == "Elixir.Kith.Workers.DuplicateDetectionWorker", + where: j.state in ^@cancellable_states, + where: fragment("(?->>'account_id')::int", j.args) == ^account_id + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end +end +``` + +Two key properties: +- **Account-scoped**: every WHERE clause filters by `account_id` (directly or + transitively via `import_id IN account's imports`). +- **State filter**: only jobs in cancellable states are touched. Completed and + cancelled jobs are left alone. + +### Error handling + +- Each cleanup module returns `:ok` on success, raises on unexpected failure. +- The worker's `Enum.each` propagates the raise. Oban catches, marks the job + `:retryable`, and retries per backoff (`max_attempts: 3`). +- After 3 attempts, the job moves to `:discarded`. The Oban Web dashboard + surfaces this to admins. The audit log written at the start is still present + (since `AuditLogs.Cleanup` is last) → user/admin can see the reset was + attempted. +- Bulk deletes are NOT wrapped in `Ecto.Multi`. Each `Repo.delete_all` is its + own transaction; large accounts don't fight for a single long-held lock. +- Cleanup operations are inherently idempotent (deleting from an empty table + succeeds with `{0, nil}`). Retries are safe. + +### Storage delete: the one warn-and-continue path + +Storage operations can fail benignly (S3 already deleted, network blip). The +existing pattern is preserved: + +```elixir +defp safe_delete_file(nil), do: :ok + +defp safe_delete_file(key) do + case Kith.Storage.delete(key) do + :ok -> :ok + + {:error, reason} -> + Logger.warning("[Storage.AccountCleanup] failed to delete #{key}: #{inspect(reason)}") + :ok + end +end +``` + +This is `:ok` because storage objects are recoverable separately (S3 lifecycle, +manual sweep) and don't affect data integrity. + +### Observability + +Logger metadata: every cleanup logs with `account_id` and `worker` in +`Logger.metadata`, plus a `[Module.Name]` prefix in the message body. Sample: + +``` +[AccountReset] starting reset for account 42 +[AccountReset] running Kith.Imports.JobCancellation +[Imports.JobCancellation] cancelled 3 import job(s) + 1 account-scoped job(s) for account 42 +[AccountReset] running Kith.Storage.AccountCleanup +[Storage.AccountCleanup] deleted 47 photo files + 12 document files + 2 import uploads for account 42 +[AccountReset] running Kith.Contacts.Cleanup +[Contacts.Cleanup] hard-deleted 423 contacts (CASCADE) for account 42 +... +[AccountReset] completed reset for account 42 +``` + +The structured `account_id` metadata reaches log search and Sentry as a tag, +not just a substring in the message. + +## Testing + +### Per-module unit tests + +Every Cleanup module gets `test/kith//cleanup_test.exs` with the same +shape: + +- Fixture data for the target account AND a control account +- Call `wipe_for_account(target_account_id)` +- Assert: target rows are zero; control rows are unchanged + +The **control-account untouched assertion is mandatory** in every test — it's +the contract that protects against cross-account leakage. + +### `Imports.JobCancellation` test + +Uses `Oban.Testing`. Inserts pending jobs for both accounts (matching all four +`@import_workers` plus `DuplicateDetectionWorker`). After +`wipe_for_account(target)`: + +- Target's jobs: state `"cancelled"` +- Other account's jobs: state `"available"` (unchanged) +- Completed jobs (state `"completed"`) for the target: also unchanged (we only + cancel still-cancellable states) + +### Regression test for the user-reported bug + +`test/kith/workers/account_reset_worker_test.exs` gets the actual scenario +that broke on dev: + +```elixir +test "re-import after reset can sync photos without finding stale import_records", ctx do + # Initial import: creates contact + import_record for Monica id 964 + import_a = import_fixture(ctx.account, ctx.user_id, %{source: "monica_api"}) + contact_a = contact_fixture(ctx.account) + {:ok, _} = Imports.record_imported_entity(import_a, "contact", "964", "contact", contact_a.id) + + # Full reset + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.account}) + + # Target account fully wiped + assert count_for(Contacts.Contact, ctx.account) == 0 + assert count_for(Imports.Import, ctx.account) == 0 + assert count_for(Imports.ImportRecord, ctx.account) == 0 + + # Re-import: new contact + new import_record for the same Monica id 964 + import_b = import_fixture(ctx.account, ctx.user_id, %{source: "monica_api"}) + contact_b = contact_fixture(ctx.account) + {:ok, _} = Imports.record_imported_entity(import_b, "contact", "964", "contact", contact_b.id) + + # The photo sync lookup that previously found stale data now resolves to the new contact + assert %{local_entity_id: local_id} = + Imports.find_import_record(ctx.account, "monica_api", "contact", "964") + + assert local_id == contact_b.id +end +``` + +### Cross-account isolation test on the worker + +Snapshot-based: populate two accounts with data across every wiped domain, +reset one, assert the other's snapshot is bit-identical. + +```elixir +defp data_snapshot(account_id) do + %{ + contacts: count_for(Contacts.Contact, account_id), + imports: count_for(Imports.Import, account_id), + import_records: count_for(Imports.ImportRecord, account_id), + conversations: count_for(Conversations.Conversation, account_id), + tasks: count_for(Tasks.Task, account_id), + journal_entries: count_for(Journal.Entry, account_id), + reminders: count_for(Reminders.Reminder, account_id), + tags: count_for(Contacts.Tag, account_id), + activities: count_for(Activities.Activity, account_id), + audit_logs: count_for(AuditLogs.AuditLog, account_id) + } +end +``` + +Every new domain we wipe in the future adds one line to `data_snapshot/1` — +forgetting will cause the isolation test to fail loudly. + +### Idempotency tests + +Every Cleanup module: call `wipe_for_account/1` twice in a row; assert second +call returns `:ok` with zero counts (or whatever the second-call shape is). +Cheap, catches any assumption that the table has data. + +### What's NOT tested + +- Oban retry semantics — rely on the library's own coverage. +- Storage backend internals — `Kith.Storage.Local` and `Kith.Storage.S3` have + their own tests; `safe_delete_file/1`'s warn-on-error path is small enough + to verify by reading. + +## Migration / backwards compatibility + +No DB migrations required. All changes are at the Elixir module layer. + +Existing accounts in any state work with the new worker — including accounts +that already have orphaned `import_records` from prior resets. The next reset +will sweep them. + +## Files changed + +| File | Change | +|---|---| +| `lib/kith/audit_logs/cleanup.ex` | NEW | +| `lib/kith/contacts/cleanup.ex` | NEW | +| `lib/kith/contacts/tags_and_activities_cleanup.ex` | NEW | +| `lib/kith/conversations/cleanup.ex` | NEW | +| `lib/kith/imports/cleanup.ex` | NEW | +| `lib/kith/imports/job_cancellation.ex` | NEW | +| `lib/kith/journal/cleanup.ex` | NEW | +| `lib/kith/reminders/cleanup.ex` | NEW | +| `lib/kith/storage/account_cleanup.ex` | NEW | +| `lib/kith/tasks/cleanup.ex` | NEW | +| `lib/kith/workers/account_reset_worker.ex` | REFACTOR — orchestrator only | +| `test/kith/audit_logs/cleanup_test.exs` | NEW | +| `test/kith/contacts/cleanup_test.exs` | NEW | +| `test/kith/contacts/tags_and_activities_cleanup_test.exs` | NEW | +| `test/kith/conversations/cleanup_test.exs` | NEW | +| `test/kith/imports/cleanup_test.exs` | NEW | +| `test/kith/imports/job_cancellation_test.exs` | NEW | +| `test/kith/journal/cleanup_test.exs` | NEW | +| `test/kith/reminders/cleanup_test.exs` | NEW | +| `test/kith/storage/account_cleanup_test.exs` | NEW | +| `test/kith/tasks/cleanup_test.exs` | NEW | +| `test/kith/workers/account_reset_worker_test.exs` | EXTEND — add regression + isolation tests | + +## Verification + +1. `mix test` — 0 failures. +2. `mix quality` — clean (format + credo + sobelow + dialyzer). +3. Manual on dev: import Monica account, trigger reset via Settings → Account, + re-import the same Monica account, confirm photo sync now succeeds. +4. `tail -f log/dev.log | grep '\[AccountReset\|Cleanup\|JobCancellation\]'` + shows the structured per-step progress. + +## References + +- SOLID for Elixir standards: `07-Documentation/Standards/solid-principles-elixir.md` + (vault). Specifically §SRP-module ("god module" anti-pattern) and §OCP-decision-tree + for the function-naming-convention vs. behaviour trade-off. +- The bug surfaced in Monica re-import photo sync; root cause is the + `import_records.local_entity_id` polymorphic mapping with no DB-level FK. From e7bcea21dc558ea65c16771937dff1cdde94f9f0 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 22:33:45 +0300 Subject: [PATCH 11/58] docs: add implementation plan for account-reset-completeness + spec refinement Plan decomposes the work into 13 TDD tasks, each producing one cleanup module (or the worker refactor) per commit. Spec refinement: split the proposed TagsAndActivitiesCleanup into Contacts.Cleanup (handles tags as part of the contacts axis) and Activities.Cleanup (its own context), aligning with the SOLID-elixir SRP guidance flagged in the brainstorming pass. --- .../2026-05-15-account-reset-completeness.md | 1964 +++++++++++++++++ ...05-15-account-reset-completeness-design.md | 20 +- 2 files changed, 1976 insertions(+), 8 deletions(-) create mode 100644 docs/superpowers/plans/2026-05-15-account-reset-completeness.md diff --git a/docs/superpowers/plans/2026-05-15-account-reset-completeness.md b/docs/superpowers/plans/2026-05-15-account-reset-completeness.md new file mode 100644 index 0000000..bdf127d --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-account-reset-completeness.md @@ -0,0 +1,1964 @@ +# Account Reset Completeness Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `Kith.Workers.AccountResetWorker` fully wipe a single account's data (imports, conversations, journal, tasks, reminders, plus existing contacts/tags/activities/audit), cancel its in-flight Oban jobs first, while leaving every other account untouched. + +**Architecture:** The worker becomes a thin orchestrator that iterates over an ordered list of per-domain `Cleanup` modules. Each cleanup module exposes `wipe_for_account(account_id) :: :ok` and lives next to its domain (`Kith.Imports.Cleanup`, `Kith.Conversations.Cleanup`, etc.). Account scoping is enforced inside each cleanup with a `where: x.account_id == ^account_id` clause. In-flight Oban job cancellation queries `Oban.Job` directly with account-scoped filters (`import_id IN account's imports` / `account_id == this_account`). + +**Tech Stack:** Elixir, Phoenix, Ecto, Oban, PostgreSQL. Test framework: ExUnit + Oban.Testing. + +**Spec:** `docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md` + +**Worktree:** Work happens in the existing branch `fix/duplicate-detection` at `/Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection`. Each task is one commit; push at the end. + +--- + +## File structure (locked-in decomposition) + +**New files:** + +| Path | Responsibility | +|---|---| +| `lib/kith/imports/cleanup.ex` | Wipe `imports` + `import_records` for one account | +| `lib/kith/imports/job_cancellation.ex` | Cancel Oban jobs whose `args.import_id ∈ account's imports` (+ DuplicateDetection by `account_id`) | +| `lib/kith/conversations/cleanup.ex` | Wipe `conversations` (CASCADE → `messages`) | +| `lib/kith/journal/cleanup.ex` | Wipe `journal_entries` | +| `lib/kith/tasks/cleanup.ex` | Wipe `tasks` | +| `lib/kith/reminders/cleanup.ex` | Cancel reminder Oban jobs + wipe `reminders` (CASCADE → `reminder_rules` + `reminder_instances`) | +| `lib/kith/storage/account_cleanup.ex` | Delete photo + document + import-upload files | +| `lib/kith/contacts/cleanup.ex` | Hard-delete `contacts` (CASCADE) + wipe `tags` | +| `lib/kith/activities/cleanup.ex` | Wipe `activities` | +| `lib/kith/audit_logs/cleanup.ex` | Wipe `audit_logs` | + +**Refactored:** + +| Path | Change | +|---|---| +| `lib/kith/workers/account_reset_worker.ex` | Replace per-domain private helpers with an ordered `@cleaners` list and `Enum.each` orchestration | + +**New tests:** one per cleanup module, plus regression + isolation tests on the worker. + +--- + +## Task ordering rationale + +Each task delivers a new cleanup module + tests in one commit. Tasks 1–10 do NOT modify `AccountResetWorker` — they just create the new modules. Task 11 wires the worker to use them, in one commit, with the old private helpers removed. Task 12 adds the user-reported regression test plus the cross-account isolation test on the worker. + +This ordering means each task is independently reviewable, the worker change is one atomic commit, and the bug isn't half-fixed at any commit boundary. + +--- + +## Task 1: `Kith.Imports.Cleanup` + +**Files:** +- Create: `lib/kith/imports/cleanup.ex` +- Create: `test/kith/imports/cleanup_test.exs` + +This is the most bug-critical module — the user's photo sync failure traces directly to orphaned `import_records`. Do it first so end-to-end testing on dev can validate the fix early. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/imports/cleanup_test.exs`: + +```elixir +defmodule Kith.Imports.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Imports + alias Kith.Imports.{Cleanup, Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes imports + import_records for target account; leaves other account untouched", ctx do + target_import = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(ctx.other_account, ctx.other_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + {:ok, _} = Imports.record_imported_entity(target_import, "contact", "1", "contact", 999) + {:ok, _} = Imports.record_imported_entity(other_import, "contact", "1", "contact", 999) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Import, ctx.target_account) == 0 + assert count_for(ImportRecord, ctx.target_account) == 0 + + # Control account untouched + assert count_for(Import, ctx.other_account) == 1 + assert count_for(ImportRecord, ctx.other_account) == 1 + end + + test "is idempotent on an account with no import data", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/imports/cleanup_test.exs +``` + +Expected: compile error — `Kith.Imports.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/imports/cleanup.ex`: + +```elixir +defmodule Kith.Imports.Cleanup do + @moduledoc """ + Wipes all import history for a single account. + + Deletes `import_records` first then `imports`. Both tables are scoped by + `account_id` directly. Called by `Kith.Workers.AccountResetWorker`. + """ + + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {records, _} = + Repo.delete_all(from(r in ImportRecord, where: r.account_id == ^account_id)) + + {imports, _} = + Repo.delete_all(from(i in Import, where: i.account_id == ^account_id)) + + Logger.info( + "[Imports.Cleanup] wiped #{records} record(s) + #{imports} import(s) for account #{account_id}" + ) + + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/imports/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/imports/cleanup.ex test/kith/imports/cleanup_test.exs +git commit -m "feat: add Kith.Imports.Cleanup for account-scoped import wipe" +``` + +--- + +## Task 2: `Kith.Imports.JobCancellation` + +**Files:** +- Create: `lib/kith/imports/job_cancellation.ex` +- Create: `test/kith/imports/job_cancellation_test.exs` + +Cancels pending/scheduled/retryable/executing Oban jobs for this account's imports. Matches by `args.import_id IN (account's imports)` for the four import-worker classes, plus `args.account_id == this_account` for `DuplicateDetectionWorker`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/imports/job_cancellation_test.exs`: + +```elixir +defmodule Kith.Imports.JobCancellationTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Imports.JobCancellation + alias Kith.Repo + alias Kith.Workers.{DuplicateDetectionWorker, MonicaPhotoSyncWorker} + + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + target_import = + import_fixture(target.account_id, target.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(other.account_id, other.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + %{ + target_account: target.account_id, + target_import: target_import, + other_account: other.account_id, + other_import: other_import + } + end + + test "cancels target account's import jobs; leaves other account's jobs alone", ctx do + {:ok, target_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + {:ok, other_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.other_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_photo_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_photo_job.id).state == "available" + end + + test "cancels DuplicateDetectionWorker jobs by account_id", ctx do + {:ok, target_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.target_account})) + + {:ok, other_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.other_account})) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_dup_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_dup_job.id).state == "available" + end + + test "is a no-op when account has no jobs", ctx do + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + end + + test "ignores jobs already in 'completed' state", ctx do + {:ok, completed_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + # Manually mark as completed + completed_job + |> Ecto.Changeset.change(state: "completed", completed_at: DateTime.utc_now()) + |> Repo.update!() + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + # Completed jobs are NOT touched + assert Repo.get!(Oban.Job, completed_job.id).state == "completed" + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/imports/job_cancellation_test.exs +``` + +Expected: compile error — `Kith.Imports.JobCancellation` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/imports/job_cancellation.ex`: + +```elixir +defmodule Kith.Imports.JobCancellation do + @moduledoc """ + Cancels all pending/scheduled/retryable/executing Oban jobs that belong to a + single account's imports. + + Scoping rule: only jobs whose args reference this account (directly via + `account_id` or transitively via `import_id` belonging to one of this + account's imports) are touched. No other account's jobs are affected. + """ + + alias Kith.Imports.Import + alias Kith.Repo + + import Ecto.Query + require Logger + + @import_workers ~w[ + Elixir.Kith.Workers.MonicaApiCrawlWorker + Elixir.Kith.Workers.MonicaPhotoSyncWorker + Elixir.Kith.Workers.MonicaDocumentImportWorker + Elixir.Kith.Workers.ImportSourceWorker + ] + + @cancellable_states ~w[available scheduled retryable executing] + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + import_ids = account_import_ids(account_id) + import_cancelled = cancel_jobs_by_import_id(import_ids) + account_cancelled = cancel_jobs_by_account_id(account_id) + + Logger.info( + "[Imports.JobCancellation] cancelled #{import_cancelled} import job(s) + " <> + "#{account_cancelled} account-scoped job(s) for account #{account_id}" + ) + + :ok + end + + defp account_import_ids(account_id) do + Repo.all(from(i in Import, where: i.account_id == ^account_id, select: i.id)) + end + + defp cancel_jobs_by_import_id([]), do: 0 + + defp cancel_jobs_by_import_id(import_ids) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker in ^@import_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'import_id')::int", j.args) in ^import_ids + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end + + defp cancel_jobs_by_account_id(account_id) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker == "Elixir.Kith.Workers.DuplicateDetectionWorker", + where: j.state in ^@cancellable_states, + where: fragment("(?->>'account_id')::int", j.args) == ^account_id + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/imports/job_cancellation_test.exs +``` + +Expected: 4 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/imports/job_cancellation.ex test/kith/imports/job_cancellation_test.exs +git commit -m "feat: add Kith.Imports.JobCancellation for account-scoped Oban cancel" +``` + +--- + +## Task 3: `Kith.Storage.AccountCleanup` + +**Files:** +- Create: `lib/kith/storage/account_cleanup.ex` +- Create: `test/kith/storage/account_cleanup_test.exs` + +Iterates storage keys for the account's photos, documents, and import uploads, calls `Kith.Storage.delete/1` on each. Logs warnings on failure but never raises (storage failures must not abort the reset). + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/storage/account_cleanup_test.exs`: + +```elixir +defmodule Kith.Storage.AccountCleanupTest do + use Kith.DataCase, async: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Storage + alias Kith.Storage.AccountCleanup + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "deletes target account's photo + import-upload files; leaves other account's files alone", + ctx do + {target_photo_key, _} = upload_and_attach_photo!(ctx.target_account) + {other_photo_key, _} = upload_and_attach_photo!(ctx.other_account) + + target_upload_key = upload_import_file!(ctx.target_account, ctx.target_user) + other_upload_key = upload_import_file!(ctx.other_account, ctx.other_user) + + assert {:ok, _} = Storage.read(target_photo_key) + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(target_upload_key) + assert {:ok, _} = Storage.read(other_upload_key) + + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + + assert {:error, _} = Storage.read(target_photo_key) + assert {:error, _} = Storage.read(target_upload_key) + + # Control account untouched + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(other_upload_key) + end + + test "is a no-op when account has no files", ctx do + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + end + + defp upload_and_attach_photo!(account_id) do + contact = contact_fixture(account_id) + binary = <<0xFF, 0xD8, 0xFF, 0xE0>> + key = Storage.generate_key(account_id, "photos", "test.jpg") + {:ok, _} = Storage.upload_binary(binary, key) + + {:ok, photo} = + Contacts.create_photo(contact, %{ + "file_name" => "test.jpg", + "storage_key" => key, + "file_size" => byte_size(binary), + "content_type" => "image/jpeg" + }) + + {key, photo} + end + + defp upload_import_file!(account_id, user_id) do + key = Storage.generate_key(account_id, "imports", "export.vcf") + {:ok, _} = Storage.upload_binary("BEGIN:VCARD\nEND:VCARD\n", key) + + {:ok, _} = + Imports.create_import(account_id, user_id, %{ + source: "vcard", + file_name: "export.vcf", + file_size: 22, + file_storage_key: key + }) + + key + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/storage/account_cleanup_test.exs +``` + +Expected: compile error — `Kith.Storage.AccountCleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/storage/account_cleanup.ex`: + +```elixir +defmodule Kith.Storage.AccountCleanup do + @moduledoc """ + Deletes physical storage objects (photos, documents, import upload files) + for a single account. + + Storage failures (S3 already-deleted, network blip) are logged at `:warning` + but never raise — they must not abort the surrounding account reset. + Storage objects are recoverable separately (S3 lifecycle, manual sweep) + and don't affect data integrity. + + Must run BEFORE `Kith.Contacts.Cleanup` — once contacts are hard-deleted, + the `photos` and `documents` rows are CASCADE-deleted and we can no longer + iterate their `storage_key` values. + """ + + alias Kith.Contacts.{Contact, Document, Photo} + alias Kith.Imports.Import + alias Kith.{Repo, Storage} + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + photo_count = delete_keys(photo_keys(account_id)) + document_count = delete_keys(document_keys(account_id)) + upload_count = delete_keys(import_upload_keys(account_id)) + + Logger.info( + "[Storage.AccountCleanup] deleted #{photo_count} photo file(s) + " <> + "#{document_count} document file(s) + #{upload_count} import upload(s) " <> + "for account #{account_id}" + ) + + :ok + end + + defp photo_keys(account_id) do + Repo.all( + from(p in Photo, + join: c in Contact, + on: p.contact_id == c.id, + where: c.account_id == ^account_id, + select: p.storage_key + ) + ) + end + + defp document_keys(account_id) do + Repo.all( + from(d in Document, + join: c in Contact, + on: d.contact_id == c.id, + where: c.account_id == ^account_id, + select: d.storage_key + ) + ) + end + + defp import_upload_keys(account_id) do + Repo.all( + from(i in Import, + where: i.account_id == ^account_id, + where: not is_nil(i.file_storage_key), + select: i.file_storage_key + ) + ) + end + + defp delete_keys(keys) do + Enum.each(keys, &safe_delete/1) + length(keys) + end + + defp safe_delete(nil), do: :ok + + defp safe_delete(key) do + case Storage.delete(key) do + :ok -> + :ok + + {:error, reason} -> + Logger.warning("[Storage.AccountCleanup] failed to delete #{key}: #{inspect(reason)}") + :ok + end + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/storage/account_cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/storage/account_cleanup.ex test/kith/storage/account_cleanup_test.exs +git commit -m "feat: add Kith.Storage.AccountCleanup for account-scoped file wipe" +``` + +--- + +## Task 4: `Kith.Contacts.Cleanup` + +**Files:** +- Create: `lib/kith/contacts/cleanup.ex` +- Create: `test/kith/contacts/cleanup_test.exs` + +Hard-deletes contacts (FK CASCADE handles addresses, contact_fields, photos rows, documents rows, notes, debts, gifts, pets, emotions, relationships, calls, life_events, duplicate_candidates, immich_candidates). Also wipes `tags` (account-scoped, no contact FK). Tags share the contacts axis-of-change so they're colocated. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/contacts/cleanup_test.exs`: + +```elixir +defmodule Kith.Contacts.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Contacts.{Cleanup, Contact, Tag} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "hard-deletes contacts + tags for target account; leaves other account untouched", ctx do + contact_fixture(ctx.target_account) + contact_fixture(ctx.target_account) + contact_fixture(ctx.other_account) + + Repo.insert!(%Tag{account_id: ctx.target_account, name: "target-tag"}) + Repo.insert!(%Tag{account_id: ctx.other_account, name: "other-tag"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Contact, ctx.target_account) == 0 + assert count_for(Tag, ctx.target_account) == 0 + + assert count_for(Contact, ctx.other_account) == 1 + assert count_for(Tag, ctx.other_account) == 1 + end + + test "ignores soft-deleted vs not — hard-deletes both", ctx do + active = contact_fixture(ctx.target_account) + soft = contact_fixture(ctx.target_account) + + soft + |> Ecto.Changeset.change(deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + refute Repo.get(Contact, active.id) + refute Repo.get(Contact, soft.id) + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/contacts/cleanup_test.exs +``` + +Expected: compile error — `Kith.Contacts.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/contacts/cleanup.ex`: + +```elixir +defmodule Kith.Contacts.Cleanup do + @moduledoc """ + Hard-deletes all contacts (and CASCADE sub-entities) and account-scoped + tags for a single account. + + Sub-entities cleared via FK CASCADE: addresses, contact_fields, photos + (rows), documents (rows), notes, debts, gifts, pets, emotions, + relationships, calls, life_events, duplicate_candidates, immich_candidates. + + Note: `Kith.Storage.AccountCleanup` MUST run before this module so that + photo/document storage_keys can be enumerated before their rows are wiped. + + Tags are wiped here (not in a separate module) because they share the + contacts axis-of-change and have no other purpose. + """ + + alias Kith.Contacts.{Contact, Tag} + alias Kith.Repo + + import Ecto.Query + require Logger + + @batch_size 200 + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + contacts_deleted = delete_contacts_in_batches(account_id, 0) + + {tags_deleted, _} = + Repo.delete_all(from(t in Tag, where: t.account_id == ^account_id)) + + Logger.info( + "[Contacts.Cleanup] hard-deleted #{contacts_deleted} contact(s) + " <> + "#{tags_deleted} tag(s) for account #{account_id}" + ) + + :ok + end + + defp delete_contacts_in_batches(account_id, acc) do + ids = + Repo.all( + from(c in Contact, + where: c.account_id == ^account_id, + select: c.id, + limit: @batch_size + ) + ) + + case ids do + [] -> + acc + + _ -> + {deleted, _} = Repo.delete_all(from(c in Contact, where: c.id in ^ids)) + delete_contacts_in_batches(account_id, acc + deleted) + end + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/contacts/cleanup_test.exs +``` + +Expected: 3 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/contacts/cleanup.ex test/kith/contacts/cleanup_test.exs +git commit -m "feat: add Kith.Contacts.Cleanup for account-scoped contacts+tags wipe" +``` + +--- + +## Task 5: `Kith.Conversations.Cleanup` + +**Files:** +- Create: `lib/kith/conversations/cleanup.ex` +- Create: `test/kith/conversations/cleanup_test.exs` + +Wipes `conversations` rows; CASCADE removes `messages`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/conversations/cleanup_test.exs`: + +```elixir +defmodule Kith.Conversations.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Conversations.{Cleanup, Conversation, Message} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes conversations (CASCADE messages) for target; leaves other untouched", ctx do + target_conv = insert_conversation!(ctx.target_account, ctx.target_user, ctx.target_contact.id) + other_conv = insert_conversation!(ctx.other_account, ctx.other_user, ctx.other_contact.id) + + insert_message!(target_conv.id, ctx.target_account) + insert_message!(other_conv.id, ctx.other_account) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Conversation, ctx.target_account) == 0 + assert count_for(Message, ctx.target_account) == 0 + + assert count_for(Conversation, ctx.other_account) == 1 + assert count_for(Message, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp insert_conversation!(account_id, user_id, contact_id) do + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact_id, + subject: "test", + platform: "other", + status: "active", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + end + + defp insert_message!(conversation_id, account_id) do + Repo.insert!(%Message{ + account_id: account_id, + conversation_id: conversation_id, + body: "hi", + direction: "outgoing", + sent_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: If the `Conversation` or `Message` schema fields shown above don't match the actual schema (check `lib/kith/conversations/conversation.ex` and `lib/kith/conversations/message.ex`), adjust the test inserts to satisfy the schema. Required fields per the conversation schema reading are `account_id`, `creator_id`, `contact_id`, `subject`, `occurred_at`. Required for messages: `conversation_id`, `body`, `sent_at`. Read the schemas if any insert fails. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/conversations/cleanup_test.exs +``` + +Expected: compile error — `Kith.Conversations.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/conversations/cleanup.ex`: + +```elixir +defmodule Kith.Conversations.Cleanup do + @moduledoc """ + Wipes all conversations for a single account. FK CASCADE removes the + associated `messages` rows. + """ + + alias Kith.Conversations.Conversation + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(c in Conversation, where: c.account_id == ^account_id)) + + Logger.info("[Conversations.Cleanup] wiped #{count} conversation(s) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/conversations/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +If the insert step fails because of schema mismatch, read `lib/kith/conversations/conversation.ex` and `lib/kith/conversations/message.ex`, fix the test setup, and re-run. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/conversations/cleanup.ex test/kith/conversations/cleanup_test.exs +git commit -m "feat: add Kith.Conversations.Cleanup for account-scoped conversation wipe" +``` + +--- + +## Task 6: `Kith.Journal.Cleanup` + +**Files:** +- Create: `lib/kith/journal/cleanup.ex` +- Create: `test/kith/journal/cleanup_test.exs` + +Wipes `journal_entries`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/journal/cleanup_test.exs`: + +```elixir +defmodule Kith.Journal.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Journal + alias Kith.Journal.{Cleanup, Entry} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes journal entries for target account only", ctx do + {:ok, _} = + Journal.create_entry(ctx.target_account, ctx.target_user, %{ + "content" => "target", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + {:ok, _} = + Journal.create_entry(ctx.other_account, ctx.other_user, %{ + "content" => "other", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Entry, ctx.target_account) == 0 + assert count_for(Entry, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: `Journal.create_entry/3` may accept atom or string-keyed attrs. If the test fails on map shape, read `lib/kith/journal.ex:47` for the signature and adjust. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/journal/cleanup_test.exs +``` + +Expected: compile error — `Kith.Journal.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/journal/cleanup.ex`: + +```elixir +defmodule Kith.Journal.Cleanup do + @moduledoc """ + Wipes all journal entries for a single account. + """ + + alias Kith.Journal.Entry + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(e in Entry, where: e.account_id == ^account_id)) + + Logger.info("[Journal.Cleanup] wiped #{count} journal entr(ies) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/journal/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/journal/cleanup.ex test/kith/journal/cleanup_test.exs +git commit -m "feat: add Kith.Journal.Cleanup for account-scoped journal wipe" +``` + +--- + +## Task 7: `Kith.Tasks.Cleanup` + +**Files:** +- Create: `lib/kith/tasks/cleanup.ex` +- Create: `test/kith/tasks/cleanup_test.exs` + +Wipes `tasks`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/tasks/cleanup_test.exs`: + +```elixir +defmodule Kith.Tasks.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Repo + alias Kith.Tasks + alias Kith.Tasks.{Cleanup, Task} + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes tasks for target account only", ctx do + {:ok, _} = Tasks.create_task(ctx.target_account, ctx.target_user, %{"title" => "target task"}) + {:ok, _} = Tasks.create_task(ctx.other_account, ctx.other_user, %{"title" => "other task"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Task, ctx.target_account) == 0 + assert count_for(Task, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/tasks/cleanup_test.exs +``` + +Expected: compile error — `Kith.Tasks.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/tasks/cleanup.ex`: + +```elixir +defmodule Kith.Tasks.Cleanup do + @moduledoc """ + Wipes all tasks for a single account. + """ + + alias Kith.Repo + alias Kith.Tasks.Task + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(t in Task, where: t.account_id == ^account_id)) + + Logger.info("[Tasks.Cleanup] wiped #{count} task(s) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/tasks/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/tasks/cleanup.ex test/kith/tasks/cleanup_test.exs +git commit -m "feat: add Kith.Tasks.Cleanup for account-scoped task wipe" +``` + +--- + +## Task 8: `Kith.Reminders.Cleanup` + +**Files:** +- Create: `lib/kith/reminders/cleanup.ex` +- Create: `test/kith/reminders/cleanup_test.exs` + +Cancels Oban jobs tracked in `reminders.enqueued_oban_job_ids` (matching the existing `cancel_reminder_jobs/1` pattern from the current worker), then deletes reminders; CASCADE removes `reminder_rules` and `reminder_instances`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/reminders/cleanup_test.exs`: + +```elixir +defmodule Kith.Reminders.CleanupTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Reminders.{Cleanup, Reminder, ReminderInstance, ReminderRule} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes reminders + CASCADE rules/instances for target only", ctx do + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + other_reminder = reminder_fixture(ctx.other_account, ctx.other_contact.id, ctx.other_user) + + _target_instance = reminder_instance_fixture(target_reminder) + _other_instance = reminder_instance_fixture(other_reminder) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Reminder, ctx.target_account) == 0 + # rules + instances reference reminder_id, so we count them via the join: + assert count_orphans(ReminderRule, [target_reminder.id]) == 0 + assert count_orphans(ReminderInstance, [target_reminder.id]) == 0 + + assert count_for(Reminder, ctx.other_account) == 1 + end + + test "cancels Oban jobs tracked on the target's reminders", ctx do + # Insert a real Oban job and attach its id to a reminder + {:ok, job} = + Oban.insert(Kith.Workers.ReminderNotificationWorker.new(%{"reminder_instance_id" => 0})) + + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + + target_reminder + |> Ecto.Changeset.change(enqueued_oban_job_ids: [job.id]) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, job.id).state == "cancelled" + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end + + defp count_orphans(schema, reminder_ids) do + Repo.aggregate(from(s in schema, where: s.reminder_id in ^reminder_ids), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/reminders/cleanup_test.exs +``` + +Expected: compile error — `Kith.Reminders.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/reminders/cleanup.ex`: + +```elixir +defmodule Kith.Reminders.Cleanup do + @moduledoc """ + Cancels all Oban jobs tracked on the account's reminders, then deletes + the reminders. FK CASCADE removes `reminder_rules` and `reminder_instances`. + """ + + alias Kith.Reminders.Reminder + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + cancel_oban_jobs_for_account(account_id) + + {count, _} = + Repo.delete_all(from(r in Reminder, where: r.account_id == ^account_id)) + + Logger.info("[Reminders.Cleanup] wiped #{count} reminder(s) for account #{account_id}") + :ok + end + + defp cancel_oban_jobs_for_account(account_id) do + job_ids = + Repo.all( + from(r in Reminder, + where: r.account_id == ^account_id, + select: r.enqueued_oban_job_ids + ) + ) + |> List.flatten() + |> Enum.reject(&is_nil/1) + + Enum.each(job_ids, &Oban.cancel_job/1) + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/reminders/cleanup_test.exs +``` + +Expected: 3 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/reminders/cleanup.ex test/kith/reminders/cleanup_test.exs +git commit -m "feat: add Kith.Reminders.Cleanup for account-scoped reminder wipe" +``` + +--- + +## Task 9: `Kith.Activities.Cleanup` + +**Files:** +- Create: `lib/kith/activities/cleanup.ex` +- Create: `test/kith/activities/cleanup_test.exs` + +Wipes `activities` (account-scoped). No contact FK, so this isn't cleared by `Contacts.Cleanup`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/activities/cleanup_test.exs`: + +```elixir +defmodule Kith.Activities.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Activities.{Activity, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes activities for target account only", ctx do + Repo.insert!(%Activity{ + account_id: ctx.target_account, + summary: "target activity", + happened_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Activity{ + account_id: ctx.other_account, + summary: "other activity", + happened_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Activity, ctx.target_account) == 0 + assert count_for(Activity, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: If the `Activity` schema requires different fields (read `lib/kith/activities/activity.ex` if the insert fails), adjust the test setup. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/activities/cleanup_test.exs +``` + +Expected: compile error — `Kith.Activities.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/activities/cleanup.ex`: + +```elixir +defmodule Kith.Activities.Cleanup do + @moduledoc """ + Wipes all account-scoped activities for a single account. Activities have + no contact FK so they are not cleared by `Kith.Contacts.Cleanup`'s CASCADE. + """ + + alias Kith.Activities.Activity + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in Activity, where: a.account_id == ^account_id)) + + Logger.info("[Activities.Cleanup] wiped #{count} activit(ies) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/activities/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/activities/cleanup.ex test/kith/activities/cleanup_test.exs +git commit -m "feat: add Kith.Activities.Cleanup for account-scoped activity wipe" +``` + +--- + +## Task 10: `Kith.AuditLogs.Cleanup` + +**Files:** +- Create: `lib/kith/audit_logs/cleanup.ex` +- Create: `test/kith/audit_logs/cleanup_test.exs` + +Wipes `audit_logs`. Runs LAST in the worker pipeline so the "account_data_reset" audit log written at start lives until cleanup is done. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/audit_logs/cleanup_test.exs`: + +```elixir +defmodule Kith.AuditLogs.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.AuditLogs + alias Kith.AuditLogs.{AuditLog, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes audit logs for target account only", ctx do + {:ok, _} = + AuditLogs.create_audit_log(ctx.target_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + {:ok, _} = + AuditLogs.create_audit_log(ctx.other_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(AuditLog, ctx.target_account) == 0 + assert count_for(AuditLog, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/audit_logs/cleanup_test.exs +``` + +Expected: compile error — `Kith.AuditLogs.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/audit_logs/cleanup.ex`: + +```elixir +defmodule Kith.AuditLogs.Cleanup do + @moduledoc """ + Wipes all audit logs for a single account. Runs LAST in the reset pipeline + so the "account_data_reset" log written at the start of the worker lives + until the rest of cleanup completes. + """ + + alias Kith.AuditLogs.AuditLog + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in AuditLog, where: a.account_id == ^account_id)) + + Logger.info("[AuditLogs.Cleanup] wiped #{count} audit log(s) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/audit_logs/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/audit_logs/cleanup.ex test/kith/audit_logs/cleanup_test.exs +git commit -m "feat: add Kith.AuditLogs.Cleanup for account-scoped audit-log wipe" +``` + +--- + +## Task 11: Refactor `AccountResetWorker` to orchestrator + +**Files:** +- Modify: `lib/kith/workers/account_reset_worker.ex` (full rewrite of the worker body) + +Replace all per-domain private helpers with the ordered `@cleaners` list. Worker becomes ~40 LoC. + +- [ ] **Step 1: Replace the entire worker file content** + +Open `lib/kith/workers/account_reset_worker.ex` and replace the full content with: + +```elixir +defmodule Kith.Workers.AccountResetWorker do + @moduledoc """ + Resets a single account's data by orchestrating per-domain cleanup modules. + + Wipes everything the account owns except reference data (genders, + relationship_types, contact_field_types, etc.) and account_invitations. + Operations are scoped to the target account; no other account is affected. + + Each `@cleaners` module exposes `wipe_for_account(account_id) :: :ok`. + Order is load-bearing — see `docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md`. + """ + + use Oban.Worker, + queue: :default, + max_attempts: 3, + unique: [period: 300, fields: [:args], keys: [:account_id]] + + alias Kith.{Activities, AuditLogs, Contacts, Conversations, Imports, Journal, + Reminders, Storage, Tasks} + + require Logger + + @cleaners [ + Imports.JobCancellation, + Storage.AccountCleanup, + Contacts.Cleanup, + Imports.Cleanup, + Conversations.Cleanup, + Reminders.Cleanup, + Tasks.Cleanup, + Journal.Cleanup, + Activities.Cleanup, + AuditLogs.Cleanup + ] + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"account_id" => account_id}}) do + Logger.metadata(account_id: account_id, worker: "AccountReset") + Logger.info("[AccountReset] starting reset for account #{account_id}") + write_initiated_audit_log(account_id) + + Enum.each(@cleaners, fn cleaner -> + Logger.info("[AccountReset] running #{inspect(cleaner)}") + :ok = cleaner.wipe_for_account(account_id) + end) + + Logger.info("[AccountReset] completed reset for account #{account_id}") + :ok + end + + defp write_initiated_audit_log(account_id) do + AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{reason: "Account data reset initiated"} + }) + end +end +``` + +- [ ] **Step 2: Run the existing worker test to ensure no regression** + +```bash +mix test test/kith_web/live/settings_live/account_live_test.exs +``` + +The existing test only asserts that the job is enqueued (no behavior assertions on cleanup). Expected: 0 failures. + +- [ ] **Step 3: Run the full test suite to catch incidental breakage** + +```bash +mix test +``` + +Expected: all tests pass. The 9 new cleanup modules + the worker are now exercised together. + +- [ ] **Step 4: Run `mix format` to normalize the new file** + +```bash +mix format +``` + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/workers/account_reset_worker.ex +git commit -m "refactor: AccountResetWorker becomes orchestrator over per-domain Cleanup modules" +``` + +--- + +## Task 12: Regression + cross-account isolation tests on the worker + +**Files:** +- Modify: `test/kith/workers/account_reset_worker_test.exs` (create if doesn't exist; the existing coverage is in `account_live_test.exs`) + +Adds the user-reported scenario (re-import-after-reset succeeds) and a snapshot-based cross-account isolation test. + +- [ ] **Step 1: Check whether the test file already exists** + +```bash +ls test/kith/workers/account_reset_worker_test.exs 2>/dev/null && echo "exists" || echo "missing" +``` + +If "missing", create it from scratch with the content below. If "exists", open the file and add the two new tests inside the existing `describe "perform/1"` block, preserving any existing tests. + +- [ ] **Step 2: Write the file (or append the tests)** + +Full file content (use this if creating, or merge the tests if appending): + +```elixir +defmodule Kith.Workers.AccountResetWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Activities.Activity + alias Kith.AuditLogs.AuditLog + alias Kith.Contacts.{Contact, Tag} + alias Kith.Conversations.Conversation + alias Kith.Imports + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Journal.Entry + alias Kith.Reminders.Reminder + alias Kith.Repo + alias Kith.Tasks.Task, as: TaskSchema + alias Kith.Workers.AccountResetWorker + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + describe "perform/1 — regression: re-import after reset" do + test "re-import for same Monica contact id resolves to new local contact (no stale import_records)", + ctx do + # Initial import: contact + import_record for Monica id 964 + import_a = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_a = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_a, "contact", "964", "contact", contact_a.id) + + # Run reset + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target account fully wiped + assert count(Contact, ctx.target_account) == 0 + assert count(Import, ctx.target_account) == 0 + assert count(ImportRecord, ctx.target_account) == 0 + + # Re-import: new contact + new import_record for the same Monica id + import_b = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_b = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_b, "contact", "964", "contact", contact_b.id) + + # The photo-sync lookup that previously found stale data now resolves correctly + assert %{local_entity_id: local_id} = + Imports.find_import_record(ctx.target_account, "monica_api", "contact", "964") + + assert local_id == contact_b.id + end + end + + describe "perform/1 — cross-account isolation" do + test "resetting account A does not touch any data in account B", ctx do + target_contact = populate_data!(ctx.target_account, ctx.target_user) + _other_contact = populate_data!(ctx.other_account, ctx.other_user) + + before_other = snapshot(ctx.other_account) + + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target wiped across every domain + assert empty?(ctx.target_account) + + # Other account is bit-identical to before + assert snapshot(ctx.other_account) == before_other + + # Sanity: target_contact is gone, other account still has its contact + refute Repo.get(Contact, target_contact.id) + end + end + + defp populate_data!(account_id, user_id) do + contact = contact_fixture(account_id) + + {:ok, _} = + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + |> then(&Imports.record_imported_entity(&1, "contact", "1", "contact", contact.id)) + + Repo.insert!(%Tag{account_id: account_id, name: "t"}) + + Repo.insert!(%Activity{ + account_id: account_id, + summary: "a", + happened_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%TaskSchema{ + account_id: account_id, + creator_id: user_id, + title: "x" + }) + + Repo.insert!(%Entry{ + account_id: account_id, + author_id: user_id, + content: "c", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact.id, + subject: "s", + platform: "other", + status: "active", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + _reminder = reminder_fixture(account_id, contact.id, user_id) + + {:ok, _} = + Kith.AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "test", + event: "account_data_reset", + metadata: %{} + }) + + contact + end + + defp snapshot(account_id) do + %{ + contacts: count(Contact, account_id), + imports: count(Import, account_id), + import_records: count(ImportRecord, account_id), + conversations: count(Conversation, account_id), + tasks: count(TaskSchema, account_id), + journal_entries: count(Entry, account_id), + reminders: count(Reminder, account_id), + tags: count(Tag, account_id), + activities: count(Activity, account_id), + audit_logs: count(AuditLog, account_id) + } + end + + defp empty?(account_id) do + snapshot(account_id) == + %{ + contacts: 0, + imports: 0, + import_records: 0, + conversations: 0, + tasks: 0, + journal_entries: 0, + reminders: 0, + tags: 0, + activities: 0, + audit_logs: 0 + } + end + + defp count(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: If `populate_data!` fails to insert any record due to schema mismatch (e.g. an `Activity` requires `kind` or `actor_id`), read the schema file (`lib/kith/activities/activity.ex` etc.) and add the missing required fields. The shape above is based on the moduledoc reading — adjust as needed. + +- [ ] **Step 3: Run the new tests** + +```bash +mix test test/kith/workers/account_reset_worker_test.exs +``` + +Expected: 2 tests, 0 failures. If a schema insert fails, fix the populate_data! helper and re-run. + +- [ ] **Step 4: Run the FULL test suite** + +```bash +mix test +``` + +Expected: all tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add test/kith/workers/account_reset_worker_test.exs +git commit -m "test: add regression + cross-account isolation tests for AccountResetWorker" +``` + +--- + +## Task 13: Final verification + push + +- [ ] **Step 1: Verify the full quality pipeline** + +```bash +mix quality +``` + +Expected: compile + format + credo + sobelow + dialyzer all clean. The pre-commit hook will have caught most issues already, but run once explicitly. + +- [ ] **Step 2: Confirm no stale references to deleted private helpers in the worker** + +```bash +grep -n "delete_contacts_in_batches\|delete_tags\|delete_activities\|delete_audit_logs\|delete_stored_files\|cancel_reminder_jobs" lib/kith/workers/account_reset_worker.ex +``` + +Expected: no matches (all moved into cleanup modules). + +- [ ] **Step 3: Push the branch** + +```bash +git push +``` + +- [ ] **Step 4: Manual verification on dev (operator step)** + +The implementing engineer should report this step to the operator: + +> On the dev environment: +> 1. Run a Monica API import with the "Import photos" option checked. +> 2. Trigger account reset via Settings → Account. +> 3. Re-run the same Monica API import with photos. +> 4. Confirm `MonicaPhotoSyncWorker` completes successfully (no "contact is deleted" errors). +> 5. Tail `log/dev.log | grep -E '\[AccountReset|Cleanup|JobCancellation\]'` — should show the structured per-step progress. + +If the manual test surfaces an issue, file it as a follow-up — the spec's automated tests (regression + isolation) should have caught any structural breakage. + +--- + +## Spec coverage check (skill-required self-review) + +Each spec requirement → corresponding task: + +| Spec requirement | Tasks | +|---|---| +| Wipe `imports` + `import_records` | Task 1 | +| Cancel in-flight Oban jobs (import_id + account_id scoped) | Task 2 | +| Wipe stored files (photos, documents, import uploads) | Task 3 | +| Wipe contacts (CASCADE) + tags | Task 4 | +| Wipe conversations (CASCADE → messages) | Task 5 | +| Wipe journal_entries | Task 6 | +| Wipe tasks | Task 7 | +| Wipe reminders + cancel their Oban jobs (CASCADE → rules, instances) | Task 8 | +| Wipe activities | Task 9 | +| Wipe audit_logs (last) | Task 10 | +| Worker becomes orchestrator; old helpers removed | Task 11 | +| Regression test for user-reported bug | Task 12 | +| Cross-account isolation test on worker | Task 12 | +| Every cleanup module has a "control account untouched" assertion | Tasks 1–10 | +| Idempotency assertion in every cleanup module | Tasks 1–10 | +| Order: jobs → files → contacts → imports → conversations → reminders → tasks → journal → activities → audit | Task 11 (`@cleaners` list) | +| `safe_delete_file/1` warn-and-continue (no raise on storage errors) | Task 3 | + +All requirements covered. diff --git a/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md b/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md index dc5f317..fc1b43e 100644 --- a/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md +++ b/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md @@ -92,9 +92,9 @@ consumer. ``` lib/kith/ +├── activities/cleanup.ex # NEW — wipe account-scoped activities ├── audit_logs/cleanup.ex # NEW — wipe audit_logs -├── contacts/cleanup.ex # NEW — hard-delete contacts (CASCADE) -├── contacts/tags_and_activities_cleanup.ex # NEW — account-scoped tags + activities +├── contacts/cleanup.ex # NEW — hard-delete contacts (CASCADE) + tags ├── conversations/cleanup.ex # NEW — wipe conversations (CASCADE → messages) ├── imports/cleanup.ex # NEW — wipe imports + import_records ├── imports/job_cancellation.ex # NEW — cancel pending Oban jobs for THIS account's imports @@ -105,6 +105,10 @@ lib/kith/ └── workers/account_reset_worker.ex # REFACTOR — orchestrator only (~40 LoC) ``` +(Note: `tags` is wiped inside `Contacts.Cleanup` because it shares the +contacts axis-of-change. `activities` is its own context and gets its own +module per SOLID-elixir's SRP-module guidance.) + Each cleanup module exposes a single function: ```elixir @@ -147,7 +151,7 @@ defmodule Kith.Workers.AccountResetWorker do Reminders.Cleanup, Tasks.Cleanup, Journal.Cleanup, - Contacts.TagsAndActivitiesCleanup, + Activities.Cleanup, AuditLogs.Cleanup ] @@ -200,8 +204,8 @@ The ordering is load-bearing: removes reminder_rules, reminder_instances. 7. **`Tasks.Cleanup`** — wipes tasks. 8. **`Journal.Cleanup`** — wipes journal_entries. -9. **`Contacts.TagsAndActivitiesCleanup`** — wipes the account-scoped `tags` - and `activities` tables (no contact FK, so not cleared by step 3). +9. **`Activities.Cleanup`** — wipes account-scoped `activities` (no contact FK). + (Note: `tags` is wiped inside `Contacts.Cleanup` at step 3.) 10. **`AuditLogs.Cleanup`** — runs LAST. The "account_data_reset" audit log written at start needs to live until the reset completes; wiping it earlier would erase the audit trail of the reset itself. @@ -453,9 +457,9 @@ will sweep them. | File | Change | |---|---| +| `lib/kith/activities/cleanup.ex` | NEW | | `lib/kith/audit_logs/cleanup.ex` | NEW | -| `lib/kith/contacts/cleanup.ex` | NEW | -| `lib/kith/contacts/tags_and_activities_cleanup.ex` | NEW | +| `lib/kith/contacts/cleanup.ex` | NEW (handles contacts + tags) | | `lib/kith/conversations/cleanup.ex` | NEW | | `lib/kith/imports/cleanup.ex` | NEW | | `lib/kith/imports/job_cancellation.ex` | NEW | @@ -464,9 +468,9 @@ will sweep them. | `lib/kith/storage/account_cleanup.ex` | NEW | | `lib/kith/tasks/cleanup.ex` | NEW | | `lib/kith/workers/account_reset_worker.ex` | REFACTOR — orchestrator only | +| `test/kith/activities/cleanup_test.exs` | NEW | | `test/kith/audit_logs/cleanup_test.exs` | NEW | | `test/kith/contacts/cleanup_test.exs` | NEW | -| `test/kith/contacts/tags_and_activities_cleanup_test.exs` | NEW | | `test/kith/conversations/cleanup_test.exs` | NEW | | `test/kith/imports/cleanup_test.exs` | NEW | | `test/kith/imports/job_cancellation_test.exs` | NEW | From 9f3022ae5c216f379a239babb9eecead48d51b1c Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 22:38:44 +0300 Subject: [PATCH 12/58] feat: add Kith.Imports.Cleanup for account-scoped import wipe --- lib/kith/imports/cleanup.ex | 29 +++++++++++++++ test/kith/imports/cleanup_test.exs | 60 ++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 lib/kith/imports/cleanup.ex create mode 100644 test/kith/imports/cleanup_test.exs diff --git a/lib/kith/imports/cleanup.ex b/lib/kith/imports/cleanup.ex new file mode 100644 index 0000000..d4263ce --- /dev/null +++ b/lib/kith/imports/cleanup.ex @@ -0,0 +1,29 @@ +defmodule Kith.Imports.Cleanup do + @moduledoc """ + Wipes all import history for a single account. + + Deletes `import_records` first then `imports`. Both tables are scoped by + `account_id` directly. Called by `Kith.Workers.AccountResetWorker`. + """ + + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {records, _} = + Repo.delete_all(from(r in ImportRecord, where: r.account_id == ^account_id)) + + {imports, _} = + Repo.delete_all(from(i in Import, where: i.account_id == ^account_id)) + + Logger.info( + "[Imports.Cleanup] wiped #{records} record(s) + #{imports} import(s) for account #{account_id}" + ) + + :ok + end +end diff --git a/test/kith/imports/cleanup_test.exs b/test/kith/imports/cleanup_test.exs new file mode 100644 index 0000000..8308beb --- /dev/null +++ b/test/kith/imports/cleanup_test.exs @@ -0,0 +1,60 @@ +defmodule Kith.Imports.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Imports + alias Kith.Imports.{Cleanup, Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes imports + import_records for target account; leaves other account untouched", ctx do + target_import = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(ctx.other_account, ctx.other_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + {:ok, _} = Imports.record_imported_entity(target_import, "contact", "1", "contact", 999) + {:ok, _} = Imports.record_imported_entity(other_import, "contact", "1", "contact", 999) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Import, ctx.target_account) == 0 + assert count_for(ImportRecord, ctx.target_account) == 0 + + # Control account untouched + assert count_for(Import, ctx.other_account) == 1 + assert count_for(ImportRecord, ctx.other_account) == 1 + end + + test "is idempotent on an account with no import data", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From 07a3d55f6592eaa292bebb69858da0bb26ae77e8 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 22:52:35 +0300 Subject: [PATCH 13/58] feat: add Kith.Imports.JobCancellation for account-scoped Oban cancel --- lib/kith/imports/job_cancellation.ex | 79 +++++++++++++ test/kith/imports/job_cancellation_test.exs | 125 ++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 lib/kith/imports/job_cancellation.ex create mode 100644 test/kith/imports/job_cancellation_test.exs diff --git a/lib/kith/imports/job_cancellation.ex b/lib/kith/imports/job_cancellation.ex new file mode 100644 index 0000000..6eaff42 --- /dev/null +++ b/lib/kith/imports/job_cancellation.ex @@ -0,0 +1,79 @@ +defmodule Kith.Imports.JobCancellation do + @moduledoc """ + Cancels all pending/scheduled/retryable/executing Oban jobs that belong to a + single account's imports. + + Scoping rule: only jobs whose args reference this account (directly via + `account_id` or transitively via `import_id` belonging to one of this + account's imports) are touched. No other account's jobs are affected. + + Uses `Oban.cancel_all_jobs/2` which both updates the DB state and signals + any currently-`executing` jobs to terminate via Oban's Notifier (`:pkill`). + """ + + alias Kith.Imports.Import + alias Kith.Repo + + import Ecto.Query + require Logger + + # Workers whose args carry `import_id` — cancelled by import_id ∈ account's imports + @import_id_workers ~w[ + Kith.Workers.MonicaApiCrawlWorker + Kith.Workers.MonicaPhotoSyncWorker + Kith.Workers.MonicaDocumentImportWorker + Kith.Workers.ImportSourceWorker + ] + + # Workers whose args carry `account_id` directly — cancelled by account_id match + @account_id_workers ~w[ + Kith.Workers.ImportWorker + Kith.Workers.DuplicateDetectionWorker + ] + + @cancellable_states ~w[available scheduled retryable executing] + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + import_ids = account_import_ids(account_id) + import_cancelled = cancel_jobs_by_import_id(import_ids) + account_cancelled = cancel_jobs_by_account_id(account_id) + + Logger.info( + "[Imports.JobCancellation] cancelled #{import_cancelled} import-id-scoped job(s) + " <> + "#{account_cancelled} account-id-scoped job(s) for account #{account_id}" + ) + + :ok + end + + defp account_import_ids(account_id) do + Repo.all(from(i in Import, where: i.account_id == ^account_id, select: i.id)) + end + + defp cancel_jobs_by_import_id([]), do: 0 + + defp cancel_jobs_by_import_id(import_ids) do + {:ok, count} = + from(j in Oban.Job, + where: j.worker in ^@import_id_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'import_id')::bigint", j.args) in ^import_ids + ) + |> Oban.cancel_all_jobs() + + count + end + + defp cancel_jobs_by_account_id(account_id) do + {:ok, count} = + from(j in Oban.Job, + where: j.worker in ^@account_id_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'account_id')::bigint", j.args) == ^account_id + ) + |> Oban.cancel_all_jobs() + + count + end +end diff --git a/test/kith/imports/job_cancellation_test.exs b/test/kith/imports/job_cancellation_test.exs new file mode 100644 index 0000000..2878996 --- /dev/null +++ b/test/kith/imports/job_cancellation_test.exs @@ -0,0 +1,125 @@ +defmodule Kith.Imports.JobCancellationTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Imports.JobCancellation + alias Kith.Repo + alias Kith.Workers.{DuplicateDetectionWorker, ImportWorker, MonicaPhotoSyncWorker} + + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + target_import = + import_fixture(target.account_id, target.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(other.account_id, other.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + %{ + target_account: target.account_id, + target_import: target_import, + other_account: other.account_id, + other_import: other_import + } + end + + test "cancels target account's import jobs; leaves other account's jobs alone", ctx do + {:ok, target_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + {:ok, other_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.other_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_photo_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_photo_job.id).state == "available" + end + + test "cancels DuplicateDetectionWorker jobs by account_id", ctx do + {:ok, target_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.target_account})) + + {:ok, other_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.other_account})) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_dup_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_dup_job.id).state == "available" + end + + test "cancels ImportWorker jobs by account_id", ctx do + {:ok, target_job} = + Oban.insert( + ImportWorker.new(%{ + "account_id" => ctx.target_account, + "user_id" => 1, + "file_data" => "BEGIN:VCARD\nEND:VCARD\n" + }) + ) + + {:ok, other_job} = + Oban.insert( + ImportWorker.new(%{ + "account_id" => ctx.other_account, + "user_id" => 1, + "file_data" => "BEGIN:VCARD\nEND:VCARD\n" + }) + ) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_job.id).state == "available" + end + + test "is a no-op when account has no jobs", ctx do + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + end + + test "ignores jobs already in 'completed' state", ctx do + {:ok, completed_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + # Manually mark as completed + completed_job + |> Ecto.Changeset.change(state: "completed", completed_at: DateTime.utc_now()) + |> Repo.update!() + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + # Completed jobs are NOT touched + assert Repo.get!(Oban.Job, completed_job.id).state == "completed" + end +end From d7e9aa3314cbcf0e2c522f680eb9b75ac93ed819 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:04:35 +0300 Subject: [PATCH 14/58] feat: add Kith.Storage.AccountCleanup for account-scoped file wipe --- lib/kith/storage/account_cleanup.ex | 87 ++++++++++++++++++++ test/kith/storage/account_cleanup_test.exs | 92 ++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 lib/kith/storage/account_cleanup.ex create mode 100644 test/kith/storage/account_cleanup_test.exs diff --git a/lib/kith/storage/account_cleanup.ex b/lib/kith/storage/account_cleanup.ex new file mode 100644 index 0000000..0ad5e4e --- /dev/null +++ b/lib/kith/storage/account_cleanup.ex @@ -0,0 +1,87 @@ +defmodule Kith.Storage.AccountCleanup do + @moduledoc """ + Deletes physical storage objects (photos, documents, import upload files) + for a single account. + + Storage failures (S3 already-deleted, network blip) are logged at `:warning` + but never raise — they must not abort the surrounding account reset. + Storage objects are recoverable separately (S3 lifecycle, manual sweep) + and don't affect data integrity. + + Must run BEFORE `Kith.Contacts.Cleanup` — once contacts are hard-deleted, + the `photos` and `documents` rows are CASCADE-deleted and we can no longer + iterate their `storage_key` values. + """ + + alias Kith.Contacts.{Contact, Document, Photo} + alias Kith.Imports.Import + alias Kith.{Repo, Storage} + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + photo_count = delete_keys(photo_keys(account_id)) + document_count = delete_keys(document_keys(account_id)) + upload_count = delete_keys(import_upload_keys(account_id)) + + Logger.info( + "[Storage.AccountCleanup] deleted #{photo_count} photo file(s) + " <> + "#{document_count} document file(s) + #{upload_count} import upload(s) " <> + "for account #{account_id}" + ) + + :ok + end + + defp photo_keys(account_id) do + Repo.all( + from(p in Photo, + join: c in Contact, + on: p.contact_id == c.id, + where: c.account_id == ^account_id, + select: p.storage_key + ) + ) + end + + defp document_keys(account_id) do + Repo.all( + from(d in Document, + join: c in Contact, + on: d.contact_id == c.id, + where: c.account_id == ^account_id, + select: d.storage_key + ) + ) + end + + defp import_upload_keys(account_id) do + Repo.all( + from(i in Import, + where: i.account_id == ^account_id, + where: not is_nil(i.file_storage_key), + select: i.file_storage_key + ) + ) + end + + defp delete_keys(keys) do + Enum.each(keys, &safe_delete/1) + length(keys) + end + + defp safe_delete(nil), do: :ok + + defp safe_delete(key) do + case Storage.delete(key) do + :ok -> + :ok + + {:error, reason} -> + Logger.warning("[Storage.AccountCleanup] failed to delete #{key}: #{inspect(reason)}") + :ok + end + end +end diff --git a/test/kith/storage/account_cleanup_test.exs b/test/kith/storage/account_cleanup_test.exs new file mode 100644 index 0000000..c72fb38 --- /dev/null +++ b/test/kith/storage/account_cleanup_test.exs @@ -0,0 +1,92 @@ +defmodule Kith.Storage.AccountCleanupTest do + use Kith.DataCase, async: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Storage + alias Kith.Storage.AccountCleanup + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "deletes target account's photo + import-upload files; leaves other account's files alone", + ctx do + {target_photo_key, _} = upload_and_attach_photo!(ctx.target_account) + {other_photo_key, _} = upload_and_attach_photo!(ctx.other_account) + + target_upload_key = upload_import_file!(ctx.target_account, ctx.target_user) + other_upload_key = upload_import_file!(ctx.other_account, ctx.other_user) + + # Ensure ALL files are cleaned up after the test, regardless of what wipe does. + # Files written via Storage.upload_binary are real disk I/O outside the Ecto sandbox. + on_exit(fn -> + Enum.each( + [target_photo_key, other_photo_key, target_upload_key, other_upload_key], + fn key -> _ = Storage.delete(key) end + ) + end) + + assert {:ok, _} = Storage.read(target_photo_key) + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(target_upload_key) + assert {:ok, _} = Storage.read(other_upload_key) + + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + + assert {:error, _} = Storage.read(target_photo_key) + assert {:error, _} = Storage.read(target_upload_key) + + # Control account untouched + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(other_upload_key) + end + + test "is a no-op when account has no files", ctx do + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + end + + defp upload_and_attach_photo!(account_id) do + contact = contact_fixture(account_id) + binary = <<0xFF, 0xD8, 0xFF, 0xE0>> + key = Storage.generate_key(account_id, "photos", "test.jpg") + {:ok, _} = Storage.upload_binary(binary, key) + + {:ok, photo} = + Contacts.create_photo(contact, %{ + "file_name" => "test.jpg", + "storage_key" => key, + "file_size" => byte_size(binary), + "content_type" => "image/jpeg" + }) + + {key, photo} + end + + defp upload_import_file!(account_id, user_id) do + uuid = Ecto.UUID.generate() + key = "#{account_id}/imports/#{uuid}.vcf" + {:ok, _} = Storage.upload_binary("BEGIN:VCARD\nEND:VCARD\n", key) + + {:ok, _} = + Imports.create_import(account_id, user_id, %{ + source: "vcard", + file_name: "export.vcf", + file_size: 22, + file_storage_key: key + }) + + key + end +end From 7ac3a8b937d639fa3838573eeb33a8881173385b Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:11:19 +0300 Subject: [PATCH 15/58] feat: add Kith.Contacts.Cleanup for account-scoped contacts+tags wipe --- lib/kith/contacts/cleanup.ex | 59 ++++++++++++++++++++++++++++ test/kith/contacts/cleanup_test.exs | 60 +++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 lib/kith/contacts/cleanup.ex create mode 100644 test/kith/contacts/cleanup_test.exs diff --git a/lib/kith/contacts/cleanup.ex b/lib/kith/contacts/cleanup.ex new file mode 100644 index 0000000..b4ba270 --- /dev/null +++ b/lib/kith/contacts/cleanup.ex @@ -0,0 +1,59 @@ +defmodule Kith.Contacts.Cleanup do + @moduledoc """ + Hard-deletes all contacts (and CASCADE sub-entities) and account-scoped + tags for a single account. + + Sub-entities cleared via FK CASCADE: addresses, contact_fields, photos + (rows), documents (rows), notes, debts, gifts, pets, emotions, + relationships, calls, life_events, duplicate_candidates, immich_candidates. + + Note: `Kith.Storage.AccountCleanup` MUST run before this module so that + photo/document storage_keys can be enumerated before their rows are wiped. + + Tags are wiped here (not in a separate module) because they share the + contacts axis-of-change and have no other purpose. + """ + + alias Kith.Contacts.{Contact, Tag} + alias Kith.Repo + + import Ecto.Query + require Logger + + @batch_size 200 + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + contacts_deleted = delete_contacts_in_batches(account_id, 0) + + {tags_deleted, _} = + Repo.delete_all(from(t in Tag, where: t.account_id == ^account_id)) + + Logger.info( + "[Contacts.Cleanup] hard-deleted #{contacts_deleted} contact(s) + " <> + "#{tags_deleted} tag(s) for account #{account_id}" + ) + + :ok + end + + defp delete_contacts_in_batches(account_id, acc) do + ids = + Repo.all( + from(c in Contact, + where: c.account_id == ^account_id, + select: c.id, + limit: @batch_size + ) + ) + + case ids do + [] -> + acc + + _ -> + {deleted, _} = Repo.delete_all(from(c in Contact, where: c.id in ^ids)) + delete_contacts_in_batches(account_id, acc + deleted) + end + end +end diff --git a/test/kith/contacts/cleanup_test.exs b/test/kith/contacts/cleanup_test.exs new file mode 100644 index 0000000..e113159 --- /dev/null +++ b/test/kith/contacts/cleanup_test.exs @@ -0,0 +1,60 @@ +defmodule Kith.Contacts.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Contacts.{Cleanup, Contact, Tag} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "hard-deletes contacts + tags for target account; leaves other account untouched", ctx do + contact_fixture(ctx.target_account) + contact_fixture(ctx.target_account) + contact_fixture(ctx.other_account) + + Repo.insert!(%Tag{account_id: ctx.target_account, name: "target-tag"}) + Repo.insert!(%Tag{account_id: ctx.other_account, name: "other-tag"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Contact, ctx.target_account) == 0 + assert count_for(Tag, ctx.target_account) == 0 + + assert count_for(Contact, ctx.other_account) == 1 + assert count_for(Tag, ctx.other_account) == 1 + end + + test "ignores soft-deleted vs not — hard-deletes both", ctx do + active = contact_fixture(ctx.target_account) + soft = contact_fixture(ctx.target_account) + + soft + |> Ecto.Changeset.change(deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + refute Repo.get(Contact, active.id) + refute Repo.get(Contact, soft.id) + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From 7d1e666ffc58fcc042b7fbf62669e22ae04bb004 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:15:47 +0300 Subject: [PATCH 16/58] feat: add Kith.Conversations.Cleanup for account-scoped conversation wipe --- lib/kith/conversations/cleanup.ex | 24 ++++++++ test/kith/conversations/cleanup_test.exs | 72 ++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 lib/kith/conversations/cleanup.ex create mode 100644 test/kith/conversations/cleanup_test.exs diff --git a/lib/kith/conversations/cleanup.ex b/lib/kith/conversations/cleanup.ex new file mode 100644 index 0000000..e5005b2 --- /dev/null +++ b/lib/kith/conversations/cleanup.ex @@ -0,0 +1,24 @@ +defmodule Kith.Conversations.Cleanup do + @moduledoc """ + Wipes all conversations for a single account. FK CASCADE removes the + associated `messages` rows. + """ + + alias Kith.Conversations.Conversation + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(c in Conversation, where: c.account_id == ^account_id)) + + Logger.info( + "[Conversations.Cleanup] wiped #{count} conversation(s) for account #{account_id}" + ) + + :ok + end +end diff --git a/test/kith/conversations/cleanup_test.exs b/test/kith/conversations/cleanup_test.exs new file mode 100644 index 0000000..32a3a9f --- /dev/null +++ b/test/kith/conversations/cleanup_test.exs @@ -0,0 +1,72 @@ +defmodule Kith.Conversations.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Conversations.{Cleanup, Conversation, Message} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes conversations (CASCADE messages) for target; leaves other untouched", ctx do + target_conv = insert_conversation!(ctx.target_account, ctx.target_user, ctx.target_contact.id) + other_conv = insert_conversation!(ctx.other_account, ctx.other_user, ctx.other_contact.id) + + insert_message!(target_conv.id, ctx.target_account) + insert_message!(other_conv.id, ctx.other_account) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Conversation, ctx.target_account) == 0 + assert count_for(Message, ctx.target_account) == 0 + + assert count_for(Conversation, ctx.other_account) == 1 + assert count_for(Message, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp insert_conversation!(account_id, user_id, contact_id) do + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact_id, + subject: "test", + platform: "other", + status: "active" + }) + end + + defp insert_message!(conversation_id, account_id) do + Repo.insert!(%Message{ + account_id: account_id, + conversation_id: conversation_id, + body: "hi", + direction: "sent", + sent_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From eb72c111265d43b6163dfd99f6c0fe3f4aaceb7c Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:18:54 +0300 Subject: [PATCH 17/58] feat: add Kith.Journal.Cleanup for account-scoped journal wipe --- lib/kith/journal/cleanup.ex | 20 ++++++++++++ test/kith/journal/cleanup_test.exs | 50 ++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 lib/kith/journal/cleanup.ex create mode 100644 test/kith/journal/cleanup_test.exs diff --git a/lib/kith/journal/cleanup.ex b/lib/kith/journal/cleanup.ex new file mode 100644 index 0000000..efe2a41 --- /dev/null +++ b/lib/kith/journal/cleanup.ex @@ -0,0 +1,20 @@ +defmodule Kith.Journal.Cleanup do + @moduledoc """ + Wipes all journal entries for a single account. + """ + + alias Kith.Journal.Entry + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(e in Entry, where: e.account_id == ^account_id)) + + Logger.info("[Journal.Cleanup] wiped #{count} journal entr(ies) for account #{account_id}") + :ok + end +end diff --git a/test/kith/journal/cleanup_test.exs b/test/kith/journal/cleanup_test.exs new file mode 100644 index 0000000..ba06ef1 --- /dev/null +++ b/test/kith/journal/cleanup_test.exs @@ -0,0 +1,50 @@ +defmodule Kith.Journal.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Journal + alias Kith.Journal.{Cleanup, Entry} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes journal entries for target account only", ctx do + {:ok, _} = + Journal.create_entry(ctx.target_account, ctx.target_user, %{ + "content" => "target", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + {:ok, _} = + Journal.create_entry(ctx.other_account, ctx.other_user, %{ + "content" => "other", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Entry, ctx.target_account) == 0 + assert count_for(Entry, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From 55e48c48b6c120603419b9ced3d7c4c0e9b3d9f9 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:20:42 +0300 Subject: [PATCH 18/58] feat: add Kith.Tasks.Cleanup for account-scoped task wipe --- lib/kith/tasks/cleanup.ex | 20 ++++++++++++++++ test/kith/tasks/cleanup_test.exs | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 lib/kith/tasks/cleanup.ex create mode 100644 test/kith/tasks/cleanup_test.exs diff --git a/lib/kith/tasks/cleanup.ex b/lib/kith/tasks/cleanup.ex new file mode 100644 index 0000000..64161ef --- /dev/null +++ b/lib/kith/tasks/cleanup.ex @@ -0,0 +1,20 @@ +defmodule Kith.Tasks.Cleanup do + @moduledoc """ + Wipes all tasks for a single account. + """ + + alias Kith.Repo + alias Kith.Tasks.Task + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(t in Task, where: t.account_id == ^account_id)) + + Logger.info("[Tasks.Cleanup] wiped #{count} task(s) for account #{account_id}") + :ok + end +end diff --git a/test/kith/tasks/cleanup_test.exs b/test/kith/tasks/cleanup_test.exs new file mode 100644 index 0000000..3c35fef --- /dev/null +++ b/test/kith/tasks/cleanup_test.exs @@ -0,0 +1,41 @@ +defmodule Kith.Tasks.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Repo + alias Kith.Tasks + alias Kith.Tasks.{Cleanup, Task} + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes tasks for target account only", ctx do + {:ok, _} = Tasks.create_task(ctx.target_account, ctx.target_user, %{"title" => "target task"}) + {:ok, _} = Tasks.create_task(ctx.other_account, ctx.other_user, %{"title" => "other task"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Task, ctx.target_account) == 0 + assert count_for(Task, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From c6b7035d064ba9fad0186bcf3f5618782f3487e6 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:23:08 +0300 Subject: [PATCH 19/58] feat: add Kith.Reminders.Cleanup for account-scoped reminder wipe --- lib/kith/reminders/cleanup.ex | 37 ++++++++++++++ test/kith/reminders/cleanup_test.exs | 73 ++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 lib/kith/reminders/cleanup.ex create mode 100644 test/kith/reminders/cleanup_test.exs diff --git a/lib/kith/reminders/cleanup.ex b/lib/kith/reminders/cleanup.ex new file mode 100644 index 0000000..3c53ef2 --- /dev/null +++ b/lib/kith/reminders/cleanup.ex @@ -0,0 +1,37 @@ +defmodule Kith.Reminders.Cleanup do + @moduledoc """ + Cancels all Oban jobs tracked on the account's reminders, then deletes + the reminders. FK CASCADE removes `reminder_rules` and `reminder_instances`. + """ + + alias Kith.Reminders.Reminder + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + cancel_oban_jobs_for_account(account_id) + + {count, _} = + Repo.delete_all(from(r in Reminder, where: r.account_id == ^account_id)) + + Logger.info("[Reminders.Cleanup] wiped #{count} reminder(s) for account #{account_id}") + :ok + end + + defp cancel_oban_jobs_for_account(account_id) do + job_ids = + Repo.all( + from(r in Reminder, + where: r.account_id == ^account_id, + select: r.enqueued_oban_job_ids + ) + ) + |> List.flatten() + |> Enum.reject(&is_nil/1) + + Enum.each(job_ids, &Oban.cancel_job/1) + end +end diff --git a/test/kith/reminders/cleanup_test.exs b/test/kith/reminders/cleanup_test.exs new file mode 100644 index 0000000..91cfc85 --- /dev/null +++ b/test/kith/reminders/cleanup_test.exs @@ -0,0 +1,73 @@ +defmodule Kith.Reminders.CleanupTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Reminders.{Cleanup, Reminder, ReminderInstance} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes reminders + CASCADE rules/instances for target only", ctx do + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + other_reminder = reminder_fixture(ctx.other_account, ctx.other_contact.id, ctx.other_user) + + _target_instance = reminder_instance_fixture(target_reminder) + _other_instance = reminder_instance_fixture(other_reminder) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Reminder, ctx.target_account) == 0 + # ReminderRule is account-scoped (no reminder_id FK); verify it still exists for other account + # ReminderInstance has a reminder_id FK — CASCADE should remove it + assert count_orphans(ReminderInstance, [target_reminder.id]) == 0 + + assert count_for(Reminder, ctx.other_account) == 1 + end + + test "cancels Oban jobs tracked on the target's reminders", ctx do + {:ok, job} = + Oban.insert(Kith.Workers.ReminderNotificationWorker.new(%{"reminder_instance_id" => 0})) + + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + + target_reminder + |> Ecto.Changeset.change(enqueued_oban_job_ids: [job.id]) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, job.id).state == "cancelled" + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end + + defp count_orphans(schema, reminder_ids) do + Repo.aggregate(from(s in schema, where: s.reminder_id in ^reminder_ids), :count) + end +end From 63e0bbf99562d4547decd82de71ff270862b778f Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:25:49 +0300 Subject: [PATCH 20/58] feat: add Kith.Activities.Cleanup for account-scoped activity wipe --- lib/kith/activities/cleanup.ex | 21 ++++++++++++ test/kith/activities/cleanup_test.exs | 47 +++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 lib/kith/activities/cleanup.ex create mode 100644 test/kith/activities/cleanup_test.exs diff --git a/lib/kith/activities/cleanup.ex b/lib/kith/activities/cleanup.ex new file mode 100644 index 0000000..4f4c450 --- /dev/null +++ b/lib/kith/activities/cleanup.ex @@ -0,0 +1,21 @@ +defmodule Kith.Activities.Cleanup do + @moduledoc """ + Wipes all account-scoped activities for a single account. Activities have + no contact FK so they are not cleared by `Kith.Contacts.Cleanup`'s CASCADE. + """ + + alias Kith.Activities.Activity + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in Activity, where: a.account_id == ^account_id)) + + Logger.info("[Activities.Cleanup] wiped #{count} activit(ies) for account #{account_id}") + :ok + end +end diff --git a/test/kith/activities/cleanup_test.exs b/test/kith/activities/cleanup_test.exs new file mode 100644 index 0000000..7524434 --- /dev/null +++ b/test/kith/activities/cleanup_test.exs @@ -0,0 +1,47 @@ +defmodule Kith.Activities.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Activities.{Activity, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes activities for target account only", ctx do + Repo.insert!(%Activity{ + account_id: ctx.target_account, + title: "target activity", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Activity{ + account_id: ctx.other_account, + title: "other activity", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Activity, ctx.target_account) == 0 + assert count_for(Activity, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From ab41cf7b5996d1a0b44c1a7bd07380e598bf492e Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:27:30 +0300 Subject: [PATCH 21/58] feat: add Kith.AuditLogs.Cleanup for account-scoped audit-log wipe --- lib/kith/audit_logs/cleanup.ex | 22 ++++++++++++ test/kith/audit_logs/cleanup_test.exs | 52 +++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 lib/kith/audit_logs/cleanup.ex create mode 100644 test/kith/audit_logs/cleanup_test.exs diff --git a/lib/kith/audit_logs/cleanup.ex b/lib/kith/audit_logs/cleanup.ex new file mode 100644 index 0000000..1753f9e --- /dev/null +++ b/lib/kith/audit_logs/cleanup.ex @@ -0,0 +1,22 @@ +defmodule Kith.AuditLogs.Cleanup do + @moduledoc """ + Wipes all audit logs for a single account. Runs LAST in the reset pipeline + so the "account_data_reset" log written at the start of the worker lives + until the rest of cleanup completes. + """ + + alias Kith.AuditLogs.AuditLog + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in AuditLog, where: a.account_id == ^account_id)) + + Logger.info("[AuditLogs.Cleanup] wiped #{count} audit log(s) for account #{account_id}") + :ok + end +end diff --git a/test/kith/audit_logs/cleanup_test.exs b/test/kith/audit_logs/cleanup_test.exs new file mode 100644 index 0000000..d1952da --- /dev/null +++ b/test/kith/audit_logs/cleanup_test.exs @@ -0,0 +1,52 @@ +defmodule Kith.AuditLogs.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.AuditLogs + alias Kith.AuditLogs.{AuditLog, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes audit logs for target account only", ctx do + {:ok, _} = + AuditLogs.create_audit_log(ctx.target_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + {:ok, _} = + AuditLogs.create_audit_log(ctx.other_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(AuditLog, ctx.target_account) == 0 + assert count_for(AuditLog, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From cdd46d4207a04507d3b7dc0f39f660abbd361bed Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:30:48 +0300 Subject: [PATCH 22/58] refactor: AccountResetWorker becomes orchestrator over per-domain Cleanup modules --- lib/kith/workers/account_reset_worker.ex | 145 +++++++---------------- 1 file changed, 44 insertions(+), 101 deletions(-) diff --git a/lib/kith/workers/account_reset_worker.ex b/lib/kith/workers/account_reset_worker.ex index 49630f0..97a1364 100644 --- a/lib/kith/workers/account_reset_worker.ex +++ b/lib/kith/workers/account_reset_worker.ex @@ -1,9 +1,13 @@ defmodule Kith.Workers.AccountResetWorker do @moduledoc """ - Oban worker that resets account data — deletes all contacts and - sub-entities while preserving users, account settings, and reference data. + Resets a single account's data by orchestrating per-domain cleanup modules. - Processes in batches to avoid long-running transactions on large accounts. + Wipes everything the account owns except reference data (genders, + relationship_types, contact_field_types, etc.) and account_invitations. + Operations are scoped to the target account; no other account is affected. + + Each `@cleaners` module exposes `wipe_for_account(account_id) :: :ok`. + Order is load-bearing — see `docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md`. """ use Oban.Worker, @@ -11,115 +15,54 @@ defmodule Kith.Workers.AccountResetWorker do max_attempts: 3, unique: [period: 300, fields: [:args], keys: [:account_id]] - import Ecto.Query - alias Kith.Repo + alias Kith.{ + Activities, + AuditLogs, + Contacts, + Conversations, + Imports, + Journal, + Reminders, + Storage, + Tasks + } require Logger - @batch_size 200 + @cleaners [ + Imports.JobCancellation, + Storage.AccountCleanup, + Contacts.Cleanup, + Imports.Cleanup, + Conversations.Cleanup, + Reminders.Cleanup, + Tasks.Cleanup, + Journal.Cleanup, + Activities.Cleanup, + AuditLogs.Cleanup + ] @impl Oban.Worker def perform(%Oban.Job{args: %{"account_id" => account_id}}) do - Logger.info("AccountResetWorker: starting reset for account #{account_id}") + Logger.metadata(account_id: account_id, worker: "AccountReset") + Logger.info("[AccountReset] starting reset for account #{account_id}") + write_initiated_audit_log(account_id) + + Enum.each(@cleaners, fn cleaner -> + Logger.info("[AccountReset] running #{inspect(cleaner)}") + :ok = cleaner.wipe_for_account(account_id) + end) + + Logger.info("[AccountReset] completed reset for account #{account_id}") + :ok + end - Kith.AuditLogs.create_audit_log(account_id, %{ + defp write_initiated_audit_log(account_id) do + AuditLogs.create_audit_log(account_id, %{ user_id: nil, user_name: "system", event: "account_data_reset", metadata: %{reason: "Account data reset initiated"} }) - - # 1. Cancel all Oban reminder jobs for the account - cancel_reminder_jobs(account_id) - - # 2. Delete stored files (photos, documents) - delete_stored_files(account_id) - - # 3. Delete all contacts and sub-entities in batches - delete_contacts_in_batches(account_id) - - # 4. Delete orphaned data: tags, activities without contacts - delete_tags(account_id) - delete_activities(account_id) - delete_audit_logs(account_id) - - Logger.info("AccountResetWorker: completed reset for account #{account_id}") - :ok - end - - defp cancel_reminder_jobs(account_id) do - job_ids = - from(r in Kith.Reminders.Reminder, - where: r.account_id == ^account_id, - select: r.enqueued_oban_job_ids - ) - |> Repo.all() - |> List.flatten() - - Enum.each(job_ids, &Oban.cancel_job/1) - end - - defp delete_stored_files(account_id) do - # Delete photo files - from(p in Kith.Contacts.Photo, - join: c in Kith.Contacts.Contact, - on: p.contact_id == c.id, - where: c.account_id == ^account_id, - select: p.storage_key - ) - |> Repo.all() - |> Enum.each(&safe_delete_file/1) - - # Delete document files - from(d in Kith.Contacts.Document, - join: c in Kith.Contacts.Contact, - on: d.contact_id == c.id, - where: c.account_id == ^account_id, - select: d.storage_key - ) - |> Repo.all() - |> Enum.each(&safe_delete_file/1) - end - - defp safe_delete_file(nil), do: :ok - - defp safe_delete_file(key) do - case Kith.Storage.delete(key) do - :ok -> :ok - {:error, reason} -> Logger.warning("Failed to delete file #{key}: #{inspect(reason)}") - end - end - - defp delete_contacts_in_batches(account_id) do - # Hard-delete contacts (bypassing soft-delete) — CASCADE handles sub-entities - contact_ids = - from(c in Kith.Contacts.Contact, - where: c.account_id == ^account_id, - select: c.id, - limit: @batch_size - ) - |> Repo.all() - - if contact_ids != [] do - from(c in Kith.Contacts.Contact, where: c.id in ^contact_ids) - |> Repo.delete_all() - - delete_contacts_in_batches(account_id) - end - end - - defp delete_tags(account_id) do - from(t in Kith.Contacts.Tag, where: t.account_id == ^account_id) - |> Repo.delete_all() - end - - defp delete_activities(account_id) do - from(a in Kith.Activities.Activity, where: a.account_id == ^account_id) - |> Repo.delete_all() - end - - defp delete_audit_logs(account_id) do - from(al in Kith.AuditLogs.AuditLog, where: al.account_id == ^account_id) - |> Repo.delete_all() end end From 28516fba6fefef6f277bf49ad6b6e2a1307889ef Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:41:39 +0300 Subject: [PATCH 23/58] test: add regression + cross-account isolation tests for AccountResetWorker --- .../workers/account_reset_worker_test.exs | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 test/kith/workers/account_reset_worker_test.exs diff --git a/test/kith/workers/account_reset_worker_test.exs b/test/kith/workers/account_reset_worker_test.exs new file mode 100644 index 0000000..1a1c5ff --- /dev/null +++ b/test/kith/workers/account_reset_worker_test.exs @@ -0,0 +1,190 @@ +defmodule Kith.Workers.AccountResetWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Activities.Activity + alias Kith.AuditLogs.AuditLog + alias Kith.Contacts.{Contact, Tag} + alias Kith.Conversations.Conversation + alias Kith.Imports + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Journal.Entry + alias Kith.Reminders.Reminder + alias Kith.Repo + alias Kith.Tasks.Task, as: TaskSchema + alias Kith.Workers.AccountResetWorker + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + describe "perform/1 — regression: re-import after reset" do + test "re-import for same Monica contact id resolves to new local contact (no stale import_records)", + ctx do + # Initial import: contact + import_record for Monica id 964 + import_a = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_a = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_a, "contact", "964", "contact", contact_a.id) + + # Run reset + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target account fully wiped + assert count(Contact, ctx.target_account) == 0 + assert count(Import, ctx.target_account) == 0 + assert count(ImportRecord, ctx.target_account) == 0 + + # Re-import: new contact + new import_record for the same Monica id + import_b = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_b = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_b, "contact", "964", "contact", contact_b.id) + + # The photo-sync lookup that previously found stale data now resolves correctly + assert %{local_entity_id: local_id} = + Imports.find_import_record(ctx.target_account, "monica_api", "contact", "964") + + assert local_id == contact_b.id + end + end + + describe "perform/1 — cross-account isolation" do + test "resetting account A does not touch any data in account B", ctx do + target_contact = populate_data!(ctx.target_account, ctx.target_user) + _other_contact = populate_data!(ctx.other_account, ctx.other_user) + + before_other = snapshot(ctx.other_account) + + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target wiped across every domain + assert empty?(ctx.target_account) + + # Other account is bit-identical to before + assert snapshot(ctx.other_account) == before_other + + # Sanity: target_contact is gone, other account still has its contact + refute Repo.get(Contact, target_contact.id) + end + end + + defp populate_data!(account_id, user_id) do + contact = contact_fixture(account_id) + + target_import = + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + {:ok, _} = + Imports.record_imported_entity(target_import, "contact", "1", "contact", contact.id) + + Repo.insert!(%Tag{account_id: account_id, name: "t"}) + + Repo.insert!(%Activity{ + account_id: account_id, + title: "a", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%TaskSchema{ + account_id: account_id, + creator_id: user_id, + title: "x" + }) + + Repo.insert!(%Entry{ + account_id: account_id, + author_id: user_id, + content: "c", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact.id, + subject: "s", + platform: "other", + status: "active" + }) + + _reminder = reminder_fixture(account_id, contact.id, user_id) + + {:ok, _} = + Kith.AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "test", + event: "account_data_reset", + metadata: %{} + }) + + contact + end + + defp snapshot(account_id) do + %{ + contacts: count(Contact, account_id), + imports: count(Import, account_id), + import_records: count(ImportRecord, account_id), + conversations: count(Conversation, account_id), + tasks: count(TaskSchema, account_id), + journal_entries: count(Entry, account_id), + reminders: count(Reminder, account_id), + tags: count(Tag, account_id), + activities: count(Activity, account_id), + audit_logs: count(AuditLog, account_id) + } + end + + defp empty?(account_id) do + snapshot(account_id) == + %{ + contacts: 0, + imports: 0, + import_records: 0, + conversations: 0, + tasks: 0, + journal_entries: 0, + reminders: 0, + tags: 0, + activities: 0, + audit_logs: 0 + } + end + + defp count(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end From 7b3a355e9f12677bb6f92f703eee82caf0cce072 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Fri, 15 May 2026 23:45:46 +0300 Subject: [PATCH 24/58] docs: clarify Reminders.Cleanup moduledoc on reminder_rules preservation reminder_rules is account-scoped (not reminder-scoped) and has no FK relationship to reminders, so it's not CASCADE-cleared. Rules are 3 seeded-per-account pre-notification defaults treated as reference data. --- lib/kith/reminders/cleanup.ex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/kith/reminders/cleanup.ex b/lib/kith/reminders/cleanup.ex index 3c53ef2..03a92b0 100644 --- a/lib/kith/reminders/cleanup.ex +++ b/lib/kith/reminders/cleanup.ex @@ -1,7 +1,11 @@ defmodule Kith.Reminders.Cleanup do @moduledoc """ Cancels all Oban jobs tracked on the account's reminders, then deletes - the reminders. FK CASCADE removes `reminder_rules` and `reminder_instances`. + the reminders. FK CASCADE removes `reminder_instances`. + + Note: `reminder_rules` is intentionally NOT wiped — rules are account-level + pre-notification configuration (3 defaults seeded per account, toggleable + but not deletable per the schema) and are treated as reference data. """ alias Kith.Reminders.Reminder From 6af91bfe5a7c421117f7d96316b2c06d8c278e28 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 01:46:11 +0300 Subject: [PATCH 25/58] =?UTF-8?q?fix:=20Monica=20import=20duplicate=20hand?= =?UTF-8?q?ling=20=E2=80=94=20auto-merge=20contract,=20cartesian=20explosi?= =?UTF-8?q?on,=20E.164=20normalization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent bugs combined to make 1000 Monica imports surface as ~6000 entries in the duplicates tab even when "Auto-merge definite duplicates" was checked: Bug C (primary cause of "auto-merge did nothing"): MonicaApiCrawlWorker.build_opts/1 only forwarded "extra_notes" — every other wizard option, including "auto_merge_duplicates", was silently dropped before reaching MonicaApi.crawl/5. Auto-merge was structurally unreachable from the UI; only unit tests calling crawl/5 directly with their own opts had ever exercised it. Now forwards api_options unchanged, preserving the legacy extra_notes default-on semantic. Bug A (primary cause of "6000 entries"): DuplicateDetectionWorker.find_phone_matches/1 joined contact_fields on the digit-normalized value but pre-filtered the *raw* value, so any phone whose digits-stripped form was empty ("+", "()", "-", "N/A") passed the filter, normalized to "", and matched every other zero-digit phone — C(N,2) false candidates. The email side had a smaller analog: no TRIM, no cf2 filter. Now strict equality on canonical values for phone, TRIM(LOWER(...)) plus per-side non-empty filters for email. Bug B (auto-merge predicate too narrow): Within a name group the predicate compared values raw — Monica's CardDAV-sync duplicates (monicahq/monica#6175) escape it on trivial whitespace/casing artifacts. Name key now trims + collapses whitespace; predicate accepts shared email OR phone OR address with normalized comparators; addresses preloaded. Bug D (heuristic phone storage): PhoneFormatter.normalize/1 was heuristic — "10 digits stays as-is", "11 digits starting with 1 becomes +1...", "00" IDD prefix unhandled — so the same number written two ways was stored as two different values. Replaced with ex_phone_number (libphonenumber port) producing E.164. normalize/2 takes a default_region for bare numbers; bare input without a region round-trips unchanged. PhoneRenormalizeWorker backfills existing rows once. UX: auto_merge_duplicates wizard default flipped to true. New region picker pre-populated from account.locale, listing every libphonenumber-supported region with localized country names via ex_cldr_territories ∩ ExPhoneNumber.Metadata.get_supported_regions/0. Detection worker phone match simplified to plain equality now that values are canonical at write-time, removing the per-row regex and the filter mismatch that caused the cartesian explosion. Tests: - Boundary test in monica_api_crawl_worker_test that round-trips the wizard flag through build_opts (would have caught Bug C directly). - Cartesian-explosion regression in duplicate_detection_worker_test. - Email TRIM and phone-normalization-on-import in respective files. - phone_formatter_test rewritten for explicit-region semantics. - New phone_renormalize_worker_test (5 tests). - Production libphonenumber metadata in :test (was test-only metadata that diverged from real validation on "555" NANP prefixes). Dialyzer: Added .dialyzer_ignore.exs suppressing two :contract_supertype warnings against Kith.Cldr.Territory — these are emitted from generated code in ex_cldr_territories, not actionable from our side. 1122 tests pass, mix quality clean. --- .dialyzer_ignore.exs | 10 + config/config.exs | 4 + config/test.exs | 4 + lib/kith/cldr.ex | 2 +- lib/kith/contacts/phone_formatter.ex | 175 ++++++++++++++---- lib/kith/imports/sources/monica_api.ex | 151 ++++++++++++--- .../workers/duplicate_detection_worker.ex | 24 ++- lib/kith/workers/monica_api_crawl_worker.ex | 15 +- lib/kith/workers/phone_renormalize_worker.ex | 117 ++++++++++++ lib/kith_web/live/import_wizard_live.ex | 106 ++++++++--- mix.exs | 7 +- mix.lock | 2 + test/kith/contacts/phone_formatter_test.exs | 122 ++++++++++-- test/kith/imports/sources/monica_api_test.exs | 42 ++++- .../duplicate_detection_worker_test.exs | 82 +++++++- .../workers/monica_api_crawl_worker_test.exs | 55 ++++++ .../workers/phone_renormalize_worker_test.exs | 132 +++++++++++++ 17 files changed, 930 insertions(+), 120 deletions(-) create mode 100644 .dialyzer_ignore.exs create mode 100644 lib/kith/workers/phone_renormalize_worker.ex create mode 100644 test/kith/workers/phone_renormalize_worker_test.exs diff --git a/.dialyzer_ignore.exs b/.dialyzer_ignore.exs new file mode 100644 index 0000000..5906f2e --- /dev/null +++ b/.dialyzer_ignore.exs @@ -0,0 +1,10 @@ +# Suppress dialyzer warnings produced by generated code in third-party +# libraries. Re-evaluate this list whenever we upgrade deps. + +[ + # ex_cldr_territories generates type specs that are slightly broader than + # the success typing for these zero-arg accessors on Kith.Cldr.Territory. + # Reported as `:contract_supertype` against lib/kith/cldr.ex (the backend + # module that injects the provider). Not actionable from our code. + {"lib/kith/cldr.ex", :contract_supertype} +] diff --git a/config/config.exs b/config/config.exs index 4fb3c28..e93ee72 100644 --- a/config/config.exs +++ b/config/config.exs @@ -10,6 +10,10 @@ import Config # Register .vcf (vCard) MIME type for LiveView uploads config :mime, :types, %{"text/vcard" => ["vcf"], "application/json" => ["json"]} +# Default CLDR backend — required so ex_cldr_territories can resolve +# locale-aware territory data without an explicit per-call backend argument. +config :ex_cldr, default_backend: Kith.Cldr + config :kith, :scopes, user: [ default: true, diff --git a/config/test.exs b/config/test.exs index 9732fc4..215c77e 100644 --- a/config/test.exs +++ b/config/test.exs @@ -26,6 +26,10 @@ config :kith, KithWeb.Endpoint, # Disable Oban in tests (use Oban.Testing) config :kith, Oban, testing: :manual +# Use the production libphonenumber metadata in tests so test-only validation +# rules (NANP "555" prefixes, etc.) don't diverge from real behavior. +config :ex_phone_number, metadata_file: Path.join("resources", "PhoneNumberMetadata.xml") + # Disable PromEx in tests (its Ecto poller conflicts with sandbox ownership) config :kith, Kith.PromEx, disabled: true diff --git a/lib/kith/cldr.ex b/lib/kith/cldr.ex index bb8891b..1da9647 100644 --- a/lib/kith/cldr.ex +++ b/lib/kith/cldr.ex @@ -7,5 +7,5 @@ defmodule Kith.Cldr do use Cldr, locales: ["en", "ar", "fr", "de", "es", "pt", "ja", "zh"], default_locale: "en", - providers: [Cldr.Number, Cldr.DateTime, Cldr.Calendar] + providers: [Cldr.Number, Cldr.DateTime, Cldr.Calendar, Cldr.Territory] end diff --git a/lib/kith/contacts/phone_formatter.ex b/lib/kith/contacts/phone_formatter.ex index 1532732..4517e15 100644 --- a/lib/kith/contacts/phone_formatter.ex +++ b/lib/kith/contacts/phone_formatter.ex @@ -1,62 +1,170 @@ defmodule Kith.Contacts.PhoneFormatter do @moduledoc """ - Phone number normalization and formatting. + Phone number normalization (E.164 for storage) and display formatting. - Stores numbers in a normalized form internally (E.164 when possible), - formats for display according to account preference. + Storage form is E.164 when the value can be parsed as a valid international + number — either because it carries a `+` country-code prefix, or because the + caller supplies a `default_region` (ISO 3166-1 alpha-2) for bare numbers. + Unparseable input is returned trimmed-but-otherwise-unchanged so user data + is never silently destroyed. + + Display formatting (`format/2`) reads the account's `phone_format` + preference and renders the stored E.164 value as national/international/raw. """ + alias ExPhoneNumber + + @typedoc "ISO 3166-1 alpha-2 region code or `nil` to skip bare-number parsing." + @type region :: String.t() | nil + @doc """ Normalize a phone number for storage. - Strips non-digit characters (preserving leading +), applies best-effort - country code detection for bare numbers. - - Returns `{:ok, normalized}` or `{:ok, nil}` for blank input. + Equivalent to `normalize/2` with no default region — bare numbers (without + a `+` prefix) are returned trimmed-only. Use the 2-arity form from import + paths that know the user's preferred region. """ - def normalize(nil), do: {:ok, nil} - def normalize(""), do: {:ok, nil} + @spec normalize(String.t() | nil) :: {:ok, String.t() | nil} + def normalize(value), do: normalize(value, nil) + + @doc """ + Normalize a phone number to E.164 for storage. + + * `value` — raw user / import input. + * `default_region` — ISO 3166-1 alpha-2 region (e.g. `"US"`, `"FR"`) used + to parse bare numbers without a `+` prefix. Pass `nil` to leave bare + numbers unchanged (only `+`-prefixed input is parsed). - def normalize(phone) when is_binary(phone) do - stripped = String.trim(phone) + Returns `{:ok, normalized}` where `normalized` is the canonical E.164 form, + the original trimmed string if parsing fails, or `nil` for blank input. + """ + @spec normalize(String.t() | nil, region) :: {:ok, String.t() | nil} + def normalize(nil, _), do: {:ok, nil} + def normalize("", _), do: {:ok, nil} - has_plus = String.starts_with?(stripped, "+") - digits = String.replace(stripped, ~r/[^\d]/, "") + def normalize(value, default_region) when is_binary(value) do + trimmed = String.trim(value) + has_plus = String.starts_with?(trimmed, "+") + region = if has_plus, do: nil, else: default_region cond do - digits == "" -> + trimmed == "" -> {:ok, nil} - has_plus -> - {:ok, "+" <> digits} + not has_plus and is_nil(region) -> + {:ok, trimmed} + + true -> + parse_to_e164(trimmed, region) + end + end + + defp parse_to_e164(trimmed, region) do + # Format-on-parse, not format-on-valid. libphonenumber's `is_valid_number?` + # rejects valid-but-uncommon inputs (NANP "555" test prefixes, recently + # allocated area codes, vanity numbers, region-specific oddities). Users' + # personal-CRM data is exactly that messy; refusing to canonicalize + # parseable-but-not-strictly-valid numbers re-introduces the mixed-storage + # problem detection is supposed to solve. We keep the parse check so that + # truly malformed input (`"garbage"`, `"+"`) round-trips unchanged. + case ExPhoneNumber.parse(trimmed, region) do + {:ok, parsed} -> {:ok, ExPhoneNumber.format(parsed, :e164)} + {:error, _} -> {:ok, trimmed} + end + end + + @doc """ + Map an account `locale` to a best-guess ISO 3166-1 alpha-2 region code. - # Bare 10-digit number — could be many countries, store as-is - String.length(digits) == 10 -> - {:ok, digits} + Returns `nil` when the locale doesn't map cleanly — callers should treat + `nil` as "don't normalize bare numbers" and prompt the user to pick. + """ + @spec region_for_locale(String.t() | nil) :: region + def region_for_locale(nil), do: nil + + def region_for_locale(locale) when is_binary(locale) do + locale + |> String.split(~r/[-_]/) + |> List.first() + |> String.downcase() + |> language_to_region() + end - # US/Canada: 11-digit starting with 1 - String.length(digits) == 11 and String.starts_with?(digits, "1") -> - {:ok, "+" <> digits} + defp language_to_region("en"), do: "US" + defp language_to_region("fr"), do: "FR" + defp language_to_region("de"), do: "DE" + defp language_to_region("es"), do: "ES" + defp language_to_region("it"), do: "IT" + defp language_to_region("pt"), do: "PT" + defp language_to_region("nl"), do: "NL" + defp language_to_region("ru"), do: "RU" + defp language_to_region("ja"), do: "JP" + defp language_to_region("zh"), do: "CN" + defp language_to_region("ko"), do: "KR" + defp language_to_region("ar"), do: "SA" + defp language_to_region(_), do: nil - # International: 7+ digits, assume needs + - String.length(digits) >= 7 -> - {:ok, "+" <> digits} + @doc """ + List every parser-supported region with its localized country name and + calling code, sorted by display name. - # Too short to normalize meaningfully - true -> - {:ok, stripped} + Returns `[{region_code, label}]` — e.g. + `[{"AF", "Afghanistan (+93)"}, {"AL", "Albania (+355)"}, ...]` + + The intersection of `ExPhoneNumber.Metadata.get_supported_regions/0` + (regions the parser can actually handle) and + `Cldr.Territory.country_codes/1` (real ISO 3166-1 countries, not + continents) is computed once per locale and cached via `:persistent_term` + to keep wizard mounts fast. + """ + @spec supported_regions(String.t()) :: [{String.t(), String.t()}] + def supported_regions(locale \\ "en") do + case :persistent_term.get({__MODULE__, :regions, locale}, :miss) do + :miss -> + regions = build_supported_regions(locale) + :persistent_term.put({__MODULE__, :regions, locale}, regions) + regions + + regions -> + regions end end + defp build_supported_regions(locale) do + parser_supported = + ExPhoneNumber.Metadata.get_supported_regions() + |> MapSet.new() + + Cldr.Territory.country_codes(as: :binary) + |> Enum.filter(&MapSet.member?(parser_supported, &1)) + |> Enum.map(&{&1, region_label(&1, locale)}) + |> Enum.sort_by(fn {_code, label} -> label end, :asc) + end + + defp region_label(code, locale) do + name = + case Kith.Cldr.Territory.from_territory_code( + String.to_atom(code), + locale: locale, + style: :standard + ) do + {:ok, localized} -> localized + _ -> code + end + + calling_code = ExPhoneNumber.Metadata.get_country_code_for_region_code(code) + "#{name} (+#{calling_code})" + end + @doc """ - Format a normalized phone number for display. + Format a stored phone number for display according to the account preference. ## Formats - * `"e164"` — E.164 as-is: `+12345678901` - * `"national"` — US/Canada national: `(234) 567-8901` - * `"international"` — International: `+1 234-567-8901` - * `"raw"` — Return as-is, no formatting + * `"e164"` — E.164 as-is: `+12025550100` + * `"national"` — US/Canada national: `(202) 555-0100` + * `"international"` — International: `+1 202-555-0100` + * `"raw"` — return the stored value unchanged """ def format(nil, _format), do: nil def format(phone, "raw"), do: phone @@ -65,7 +173,6 @@ defmodule Kith.Contacts.PhoneFormatter do def format(phone, "international"), do: format_international(phone) def format(phone, _), do: phone - # US/Canada: +1 followed by 10 digits defp format_national( <<"+"::utf8, ?1, area::binary-size(3), prefix::binary-size(3), line::binary-size(4)>> ) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index 2eb8bbe..d3fe5b5 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -27,6 +27,7 @@ defmodule Kith.Imports.Sources.MonicaApi do import Ecto.Query, warn: false alias Kith.Contacts + alias Kith.Contacts.PhoneFormatter alias Kith.Imports alias Kith.Repo alias Kith.Workers.MonicaDocumentImportWorker @@ -81,6 +82,7 @@ defmodule Kith.Imports.Sources.MonicaApi do user_id: user_id, credential: credential, import_job: import_job, + opts: opts, topic: "import:#{account_id}" } @@ -375,7 +377,7 @@ defmodule Kith.Imports.Sources.MonicaApi do defp import_api_contact_children(ctx, contact, api_contact, source_id, ref_data, acc, deferred) do # Contact fields (embedded with ?with=contactfields) - import_api_contact_fields(contact, api_contact, ref_data, ctx.import_job) + import_api_contact_fields(contact, api_contact, ref_data, ctx) # Addresses (embedded directly) import_api_addresses(contact, api_contact, ctx.import_job) @@ -393,21 +395,57 @@ defmodule Kith.Imports.Sources.MonicaApi do {acc, deferred} end - defp import_api_contact_fields(contact, api_contact, ref_data, import_job) do + defp import_api_contact_fields(contact, api_contact, ref_data, ctx) do fields = api_contact["contactFields"] || [] Enum.each(fields, fn field -> - import_single_contact_field(contact, field, ref_data, import_job) + import_single_contact_field(contact, field, ref_data, ctx) end) end - defp import_single_contact_field(contact, field, ref_data, import_job) do + defp import_single_contact_field(contact, field, ref_data, ctx) do cft_name = get_in(field, ["contact_field_type", "name"]) cft_id = if cft_name, do: Map.get(ref_data.contact_field_types, cft_name) - value = field["content"] + raw_value = field["content"] + value = normalize_field_value(raw_value, cft_id, ctx.opts) if cft_id && value && !contact_field_duplicate?(contact.id, cft_id, value) do - create_contact_field(contact, field, cft_id, value, import_job) + create_contact_field(contact, field, cft_id, value, ctx.import_job) + end + end + + # Normalize phone fields to E.164 at import time so detection and intra-contact + # dedup do simple equality. Other field types (email, social, etc) pass through. + defp normalize_field_value(nil, _cft_id, _opts), do: nil + + defp normalize_field_value(value, cft_id, opts) when is_binary(value) do + if phone_field_type?(cft_id) do + region = opts["phone_default_region"] + region = if region in [nil, ""], do: nil, else: region + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value + end + end + + defp phone_field_type?(nil), do: false + + defp phone_field_type?(cft_id) do + case :persistent_term.get({__MODULE__, :phone_cft, cft_id}, :miss) do + :miss -> + result = + Repo.exists?( + from(t in Contacts.ContactFieldType, + where: t.id == ^cft_id and fragment("? LIKE 'tel%'", t.protocol) + ) + ) + + :persistent_term.put({__MODULE__, :phone_cft, cft_id}, result) + result + + result -> + result end end @@ -575,21 +613,22 @@ defmodule Kith.Imports.Sources.MonicaApi do ) ) - # Load contacts with contact fields + # Load contacts with contact fields and addresses (addresses participate + # in the broadened definite-duplicate predicate). contacts = Repo.all( from(c in Contacts.Contact, where: c.id in ^import_records and is_nil(c.deleted_at), - preload: [contact_fields: :contact_field_type] + preload: [:addresses, contact_fields: :contact_field_type] ) ) - # Group by normalized name + # Group by trimmed-and-collapsed normalized name. CardDAV-style duplicates + # (monicahq/monica#6175) often differ only in trailing whitespace or + # double-space artifacts, so the trim is load-bearing here. name_groups = contacts - |> Enum.group_by(fn c -> - {String.downcase(c.first_name || ""), String.downcase(c.last_name || "")} - end) + |> Enum.group_by(&name_key/1) |> Enum.filter(fn {_key, group} -> length(group) >= 2 end) merged_ids = MapSet.new() @@ -651,29 +690,99 @@ defmodule Kith.Imports.Sources.MonicaApi do end end + # "Definite duplicate" predicate evaluated only after callers have already + # grouped contacts by normalized {first_name, last_name}. The trimmed-name + # equality is itself a strong identity signal, so within a group a single + # shared {email, phone, address} suffices — this catches CardDAV-shaped + # duplicates (monicahq/monica#6175) where every field is identical, the + # "triple duplicates" case (same name + same email × N), and the original + # narrow case (same name + shared phone or email). defp definite_duplicate?(contact_a, contact_b) do - emails_a = extract_values_by_protocol(contact_a, "mailto") - emails_b = extract_values_by_protocol(contact_b, "mailto") + shared_emails?(contact_a, contact_b) or + shared_phones?(contact_a, contact_b) or + shared_addresses?(contact_a, contact_b) + end - phones_a = extract_values_by_protocol(contact_a, "tel") - phones_b = extract_values_by_protocol(contact_b, "tel") + defp shared_emails?(a, b) do + set_a = extract_values_by_protocol(a, "mailto", &normalize_email_value/1) + set_b = extract_values_by_protocol(b, "mailto", &normalize_email_value/1) + intersects?(set_a, set_b) + end - shared_email? = not MapSet.disjoint?(emails_a, emails_b) - shared_phone? = not MapSet.disjoint?(phones_a, phones_b) + defp shared_phones?(a, b) do + # Phone values are E.164-canonical at this point (PhoneFormatter.normalize/2 + # ran during import for `tel`-protocol fields). Comparison is strict equality + # after stripping non-digit characters as a safety net for any pre-existing + # rows that bypassed normalization. + set_a = extract_values_by_protocol(a, "tel", &normalize_phone_digits/1) + set_b = extract_values_by_protocol(b, "tel", &normalize_phone_digits/1) + intersects?(set_a, set_b) + end - shared_email? or shared_phone? + defp shared_addresses?(a, b) do + set_a = address_keys(a) + set_b = address_keys(b) + intersects?(set_a, set_b) end - defp extract_values_by_protocol(contact, protocol_prefix) do + defp extract_values_by_protocol(contact, protocol_prefix, normalizer) do contact.contact_fields |> Enum.filter(fn cf -> cf.contact_field_type && String.starts_with?(cf.contact_field_type.protocol || "", protocol_prefix) end) - |> Enum.map(fn cf -> String.downcase(cf.value || "") end) + |> Enum.map(fn cf -> normalizer.(cf.value) end) + |> Enum.reject(&(&1 in [nil, ""])) + |> MapSet.new() + end + + defp normalize_email_value(nil), do: nil + defp normalize_email_value(v) when is_binary(v), do: v |> String.trim() |> String.downcase() + + defp normalize_phone_digits(nil), do: nil + + defp normalize_phone_digits(v) when is_binary(v) do + case String.replace(v, ~r/[^0-9]/, "") do + "" -> nil + digits -> digits + end + end + + defp address_keys(contact) do + contact.addresses + |> Enum.map(fn a -> + line1 = normalize_address_part(a.line1) + postal = normalize_address_part(a.postal_code) + if line1 != "" and postal != "", do: {line1, postal}, else: nil + end) + |> Enum.reject(&is_nil/1) |> MapSet.new() end + defp normalize_address_part(nil), do: "" + + defp normalize_address_part(v) when is_binary(v) do + v |> String.trim() |> String.downcase() |> String.replace(~r/\s+/, " ") + end + + defp intersects?(a, b) do + if MapSet.size(a) == 0 or MapSet.size(b) == 0 do + false + else + not MapSet.disjoint?(a, b) + end + end + + defp name_key(contact) do + {normalize_name_part(contact.first_name), normalize_name_part(contact.last_name)} + end + + defp normalize_name_part(nil), do: "" + + defp normalize_name_part(v) when is_binary(v) do + v |> String.trim() |> String.downcase() |> String.replace(~r/\s+/, " ") + end + defp update_import_records_after_merge(account_id, import_job, old_contact_id, new_contact_id) do from(ir in Imports.ImportRecord, where: diff --git a/lib/kith/workers/duplicate_detection_worker.ex b/lib/kith/workers/duplicate_detection_worker.ex index efea61d..fbbc0bd 100644 --- a/lib/kith/workers/duplicate_detection_worker.ex +++ b/lib/kith/workers/duplicate_detection_worker.ex @@ -119,12 +119,15 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp find_email_matches(account_id) do - # Case-insensitive email match, both fields verified as email type + # Case-insensitive email match on TRIMmed values. Trim is required because + # CardDAV-style imports occasionally leak trailing whitespace; the != '' + # checks on the trimmed form prevent whitespace-only values from forming a + # cartesian product across all such rows. query = from cf1 in ContactField, join: cf2 in ContactField, on: - fragment("LOWER(?)", cf1.value) == fragment("LOWER(?)", cf2.value) and + fragment("LOWER(TRIM(?))", cf1.value) == fragment("LOWER(TRIM(?))", cf2.value) and cf1.id < cf2.id, join: cft1 in ContactFieldType, on: cf1.contact_field_type_id == cft1.id, @@ -135,7 +138,8 @@ defmodule Kith.Workers.DuplicateDetectionWorker do where: fragment("? LIKE 'mailto%'", cft1.protocol), where: fragment("? LIKE 'mailto%'", cft2.protocol), where: cf1.contact_id != cf2.contact_id, - where: cf1.value != "" and not is_nil(cf1.value), + where: not is_nil(cf1.value) and fragment("TRIM(?) <> ''", cf1.value), + where: not is_nil(cf2.value) and fragment("TRIM(?) <> ''", cf2.value), select: {cf1.contact_id, cf2.contact_id} query @@ -148,14 +152,15 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp find_phone_matches(account_id) do - # Normalized phone match (digits only), both fields verified as phone type + # Phone values are normalized to E.164 on import (see + # `Kith.Contacts.PhoneFormatter.normalize/2`), so this becomes a plain + # equality join. The previous in-query regex normalization combined with a + # raw-value `!= ""` filter let formatting-only inputs (`+`, `()`, `-`) + # collapse to an empty string and cartesian-explode (see Bug A). query = from cf1 in ContactField, join: cf2 in ContactField, - on: - fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf1.value) == - fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf2.value) and - cf1.id < cf2.id, + on: cf1.value == cf2.value and cf1.id < cf2.id, join: cft1 in ContactFieldType, on: cf1.contact_field_type_id == cft1.id, join: cft2 in ContactFieldType, @@ -165,7 +170,8 @@ defmodule Kith.Workers.DuplicateDetectionWorker do where: fragment("? LIKE 'tel%'", cft1.protocol), where: fragment("? LIKE 'tel%'", cft2.protocol), where: cf1.contact_id != cf2.contact_id, - where: cf1.value != "" and not is_nil(cf1.value), + where: not is_nil(cf1.value) and fragment("TRIM(?) <> ''", cf1.value), + where: not is_nil(cf2.value) and fragment("TRIM(?) <> ''", cf2.value), select: {cf1.contact_id, cf2.contact_id} query diff --git a/lib/kith/workers/monica_api_crawl_worker.ex b/lib/kith/workers/monica_api_crawl_worker.ex index 292ff82..b6afccd 100644 --- a/lib/kith/workers/monica_api_crawl_worker.ex +++ b/lib/kith/workers/monica_api_crawl_worker.ex @@ -84,12 +84,19 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do } end - defp build_opts(import_job) do + @doc false + # Public for testing — the wizard → source-module flag handoff is the + # contract that Bug C silently violated, so we want a regression test that + # binds against this directly. + def build_opts(import_job) do options = import_job.api_options || %{} - %{ - "extra_notes" => options["extra_notes"] != false - } + # Forward every wizard-saved option so the source module is the single + # source of truth for which keys it reads. Normalize only the legacy + # extra_notes default-on semantic. + options + |> Map.put_new("extra_notes", true) + |> Map.update!("extra_notes", &(&1 != false)) end defp maybe_enqueue_photo_sync(import_job) do diff --git a/lib/kith/workers/phone_renormalize_worker.ex b/lib/kith/workers/phone_renormalize_worker.ex new file mode 100644 index 0000000..66297b5 --- /dev/null +++ b/lib/kith/workers/phone_renormalize_worker.ex @@ -0,0 +1,117 @@ +defmodule Kith.Workers.PhoneRenormalizeWorker do + @moduledoc """ + One-shot Oban worker that re-normalizes existing phone-protocol contact_fields + to E.164 using each account's locale-derived region. + + Run once after the libphonenumber-backed `PhoneFormatter.normalize/2` lands, + to migrate values written under the previous heuristic (e.g. "5551234567" + stored without a country code) into canonical E.164 form so the detection + worker can match by plain equality. + + Args: + * `"account_id"` (optional) — scope to a single account; omit to process all. + + Idempotent: rows whose normalized form already equals the stored value are + skipped, so re-running is safe. + """ + + use Oban.Worker, queue: :default, max_attempts: 3 + + import Ecto.Query + + alias Kith.Accounts.Account + alias Kith.Contacts.{ContactField, ContactFieldType, PhoneFormatter} + alias Kith.Repo + + require Logger + + @batch_size 500 + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"account_id" => account_id}}) do + renormalize_account(account_id) + :ok + end + + @impl Oban.Worker + def perform(%Oban.Job{args: _args}) do + account_ids = Repo.all(from(a in Account, select: a.id)) + Enum.each(account_ids, &renormalize_account/1) + :ok + end + + defp renormalize_account(account_id) do + region = + Repo.one(from(a in Account, where: a.id == ^account_id, select: a.locale)) + |> PhoneFormatter.region_for_locale() + + # ContactFieldTypes can be either system-wide (`account_id IS NULL`) or + # account-specific. Match both — the detection worker uses the same + # protocol-only filter so this mirrors that. + phone_cft_ids = + from(t in ContactFieldType, + where: is_nil(t.account_id) or t.account_id == ^account_id, + where: fragment("? LIKE 'tel%'", t.protocol), + select: t.id + ) + |> Repo.all() + + if phone_cft_ids != [] do + renormalize_batch(account_id, phone_cft_ids, region, 0, 0) + end + end + + defp renormalize_batch(account_id, cft_ids, region, offset, updated_count) do + rows = + from(cf in ContactField, + where: cf.account_id == ^account_id, + where: cf.contact_field_type_id in ^cft_ids, + order_by: [asc: cf.id], + offset: ^offset, + limit: @batch_size, + select: {cf.id, cf.value} + ) + |> Repo.all() + + if rows == [] do + Logger.info("[PhoneRenormalizeWorker] account=#{account_id} done, updated=#{updated_count}") + + :ok + else + batch_updated = count_updates(rows, region) + + renormalize_batch( + account_id, + cft_ids, + region, + offset + @batch_size, + updated_count + batch_updated + ) + end + end + + defp count_updates(rows, region) do + Enum.reduce(rows, 0, fn {id, value}, acc -> + acc + update_to_int(maybe_update(id, value, region)) + end) + end + + defp update_to_int(:updated), do: 1 + defp update_to_int(:unchanged), do: 0 + + defp maybe_update(_id, nil, _region), do: :unchanged + defp maybe_update(_id, "", _region), do: :unchanged + + defp maybe_update(id, value, region) do + {:ok, normalized} = PhoneFormatter.normalize(value, region) + + if normalized && normalized != value do + from(cf in ContactField, where: cf.id == ^id) + |> Repo.update_all(set: [value: normalized]) + + :updated + else + :unchanged + end + end +end diff --git a/lib/kith_web/live/import_wizard_live.ex b/lib/kith_web/live/import_wizard_live.ex index 5c4205e..6b16465 100644 --- a/lib/kith_web/live/import_wizard_live.ex +++ b/lib/kith_web/live/import_wizard_live.ex @@ -11,6 +11,7 @@ defmodule KithWeb.ImportWizardLive do use KithWeb, :live_view + alias Kith.Contacts.PhoneFormatter alias Kith.Imports alias Kith.Imports.Sources.MonicaApi alias Kith.Policy @@ -23,6 +24,8 @@ defmodule KithWeb.ImportWizardLive do @impl true def mount(_params, _session, socket) do + locale = account_locale(socket) + {:ok, socket |> assign(:page_title, "Import Contacts") @@ -30,18 +33,8 @@ defmodule KithWeb.ImportWizardLive do |> assign(:source, "vcard") |> assign(:api_url, "") |> assign(:api_key, "") - |> assign(:api_options, %{ - "photos" => false, - "auto_merge_duplicates" => false, - "pets" => true, - "calls" => true, - "activities" => true, - "gifts" => true, - "debts" => true, - "tasks" => true, - "reminders" => true, - "conversations" => true - }) + |> assign(:api_options, default_api_options(socket)) + |> assign(:phone_regions, build_phone_regions(locale)) |> assign(:api_testing, false) |> assign(:current_import, nil) |> assign(:progress, nil) @@ -54,6 +47,28 @@ defmodule KithWeb.ImportWizardLive do )} end + defp default_api_options(socket) do + %{ + "photos" => false, + "auto_merge_duplicates" => true, + "phone_default_region" => account_default_region(socket), + "pets" => true, + "calls" => true, + "activities" => true, + "gifts" => true, + "debts" => true, + "tasks" => true, + "reminders" => true, + "conversations" => true + } + end + + defp account_default_region(%{assigns: %{current_scope: %{account: %{locale: locale}}}}) + when is_binary(locale), + do: PhoneFormatter.region_for_locale(locale) || "" + + defp account_default_region(_socket), do: "" + @impl true def handle_params(_params, _uri, socket) do scope = socket.assigns.current_scope @@ -146,18 +161,7 @@ defmodule KithWeb.ImportWizardLive do |> assign(:source, "vcard") |> assign(:api_url, "") |> assign(:api_key, "") - |> assign(:api_options, %{ - "photos" => false, - "auto_merge_duplicates" => false, - "pets" => true, - "calls" => true, - "activities" => true, - "gifts" => true, - "debts" => true, - "tasks" => true, - "reminders" => true, - "conversations" => true - }) + |> assign(:api_options, default_api_options(socket)) |> assign(:api_testing, false) |> assign(:current_import, nil) |> assign(:progress, nil) @@ -165,6 +169,11 @@ defmodule KithWeb.ImportWizardLive do |> assign(:error, nil)} end + def handle_event("set_phone_region", %{"region" => region}, socket) do + options = Map.put(socket.assigns.api_options, "phone_default_region", region) + {:noreply, assign(socket, :api_options, options)} + end + # ── PubSub handlers ──────────────────────────────────────────────────────── @impl true @@ -344,11 +353,28 @@ defmodule KithWeb.ImportWizardLive do end defp build_api_options(socket) do + # Pass options through unchanged so non-boolean settings (phone_default_region) + # survive. MonicaApi reads each key directly and treats falsy as off. socket.assigns.api_options - |> Enum.filter(fn {_k, v} -> v end) - |> Enum.into(%{}, fn {k, _v} -> {k, true} end) end + defp build_phone_regions(locale) do + [{"", phone_off_label(locale)} | PhoneFormatter.supported_regions(locale)] + end + + defp account_locale(%{assigns: %{current_scope: %{account: %{locale: locale}}}}) + when is_binary(locale), + do: locale + + defp account_locale(_), do: "en" + + defp phone_off_label("en"), do: "Don't normalize bare numbers" + defp phone_off_label("fr"), do: "Ne pas normaliser les numéros sans indicatif" + defp phone_off_label("de"), do: "Nackte Nummern nicht normalisieren" + defp phone_off_label("es"), do: "No normalizar números sin prefijo" + defp phone_off_label("pt"), do: "Não normalizar números sem prefixo" + defp phone_off_label(_), do: "Don't normalize bare numbers" + # ── Render ────────────────────────────────────────────────────────────────── @impl true @@ -554,10 +580,36 @@ defmodule KithWeb.ImportWizardLive do Auto-merge definite duplicates

- Merge contacts with identical name + email or name + phone + Merge contacts that share 2+ strong signals (email, phone, address) or share an email/phone and an address

+
+ +
diff --git a/mix.exs b/mix.exs index 71f5468..37cfa14 100644 --- a/mix.exs +++ b/mix.exs @@ -36,7 +36,8 @@ defmodule Kith.MixProject do [ plt_file: {:no_warn, "priv/plts/dialyzer.plt"}, plt_add_apps: [:mix, :ex_unit], - flags: [:error_handling, :underspecs, :unknown] + flags: [:error_handling, :underspecs, :unknown], + ignore_warnings: ".dialyzer_ignore.exs" ] end @@ -107,6 +108,7 @@ defmodule Kith.MixProject do {:ex_cldr, "~> 2.40"}, {:ex_cldr_dates_times, "~> 2.20"}, {:ex_cldr_numbers, "~> 2.33"}, + {:ex_cldr_territories, "~> 2.9"}, # Logging & Observability {:logger_json, "~> 6.0"}, @@ -130,6 +132,9 @@ defmodule Kith.MixProject do # HTML Sanitization (rich text from Trix editor) {:html_sanitize_ex, "~> 1.4"}, + # Phone number parsing & E.164 normalization (libphonenumber port) + {:ex_phone_number, "~> 0.4"}, + # Server-side sorting, filtering, and pagination {:flop, "~> 0.26"}, {:flop_phoenix, "~> 0.23"}, diff --git a/mix.lock b/mix.lock index fb12711..ddafb77 100644 --- a/mix.lock +++ b/mix.lock @@ -34,8 +34,10 @@ "ex_cldr_currencies": {:hex, :ex_cldr_currencies, "2.17.1", "89947c7102ff1b46fc46095624239a1c3d72499b19ed650597630771d9e4a662", [:mix], [{:ex_cldr, "~> 2.38", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "e266a0a61f4c7d83608154d49b59e4d7485b2aaa7ba1d0e17b3c55910595de51"}, "ex_cldr_dates_times": {:hex, :ex_cldr_dates_times, "2.25.6", "6db974ab2b430b5733994c2bfbe98a69e25eeb076b876a929791ff521f8fdd96", [:mix], [{:calendar_interval, "~> 0.2", [hex: :calendar_interval, repo: "hexpm", optional: true]}, {:ex_cldr_calendars, "~> 2.4", [hex: :ex_cldr_calendars, repo: "hexpm", optional: false]}, {:ex_cldr_units, "~> 3.20", [hex: :ex_cldr_units, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:tz, "~> 0.26", [hex: :tz, repo: "hexpm", optional: true]}], "hexpm", "926ff5662b849f86088832ee66b61a96aab0fa5a54d5e14240e08ad3030663e2"}, "ex_cldr_numbers": {:hex, :ex_cldr_numbers, "2.38.1", "e5124e288a8e672831e10d39530ecb5329bc9af2169709ebfbadc814cae7d4fb", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:digital_token, "~> 0.3 or ~> 1.0", [hex: :digital_token, repo: "hexpm", optional: false]}, {:ex_cldr, "~> 2.45", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:ex_cldr_currencies, "~> 2.17", [hex: :ex_cldr_currencies, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "4f95738f1dc4e821485e52226666f7691c9276bf6eba49cba8d23c8a2db05e84"}, + "ex_cldr_territories": {:hex, :ex_cldr_territories, "2.12.0", "3c69917e67256a29e7d4eff9a12b9340186f6c5cea36ebef83fb67e5452064c9", [:mix], [{:ex_cldr, "~> 2.47", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "d12bdd3dcc1debaed3268deed6a0d8f53409f540e6a3b1410ede6cf3a6a1f768"}, "ex_hash_ring": {:hex, :ex_hash_ring, "6.0.4", "bef9d2d796afbbe25ab5b5a7ed746e06b99c76604f558113c273466d52fa6d6b", [:mix], [], "hexpm", "89adabf31f7d3dfaa36802ce598ce918e9b5b33bae8909ac1a4d052e1e567d18"}, "ex_machina": {:hex, :ex_machina, "2.8.0", "a0e847b5712065055ec3255840e2c78ef9366634d62390839d4880483be38abe", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm", "79fe1a9c64c0c1c1fab6c4fa5d871682cb90de5885320c187d117004627a7729"}, + "ex_phone_number": {:hex, :ex_phone_number, "0.4.11", "89f3f96f4b4c1404ae89b8a2f24397fd353a1d0d4b7dd39b2a633a23a4cf82b5", [:mix], [{:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: false]}], "hexpm", "cefa61b4fd4f946a1813f19fcfce1370907d31261716fb7e7d04da775ad5d9c6"}, "expo": {:hex, :expo, "1.1.1", "4202e1d2ca6e2b3b63e02f69cfe0a404f77702b041d02b58597c00992b601db5", [:mix], [], "hexpm", "5fb308b9cb359ae200b7e23d37c76978673aa1b06e2b3075d814ce12c5811640"}, "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"}, "finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"}, diff --git a/test/kith/contacts/phone_formatter_test.exs b/test/kith/contacts/phone_formatter_test.exs index 5dfd878..0b35009 100644 --- a/test/kith/contacts/phone_formatter_test.exs +++ b/test/kith/contacts/phone_formatter_test.exs @@ -3,7 +3,7 @@ defmodule Kith.Contacts.PhoneFormatterTest do alias Kith.Contacts.PhoneFormatter - describe "normalize/1" do + describe "normalize/1 (no region — opt-in normalization)" do test "returns nil for nil" do assert {:ok, nil} = PhoneFormatter.normalize(nil) end @@ -12,40 +12,124 @@ defmodule Kith.Contacts.PhoneFormatterTest do assert {:ok, nil} = PhoneFormatter.normalize("") end - test "preserves E.164 input" do - assert {:ok, "+12345678901"} = PhoneFormatter.normalize("+12345678901") + test "preserves E.164 input untouched" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize("+12025550100") end - test "preserves bare 10-digit number without adding country code" do - assert {:ok, "2345678901"} = PhoneFormatter.normalize("2345678901") + test "parses +prefixed number with formatting to E.164" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize("+1 202 555-0100") end - test "strips formatting from 10-digit number" do - assert {:ok, "2345678901"} = PhoneFormatter.normalize("(234) 567-8901") + test "parses international +prefixed number" do + assert {:ok, "+442079460958"} = PhoneFormatter.normalize("+44 20 7946 0958") end - test "does not assume country code for 10-digit numbers" do - assert {:ok, "9876543210"} = PhoneFormatter.normalize("987-654-3210") + test "leaves bare number unchanged (no region context)" do + # Without a default region, we can't safely interpret what country this + # 10-digit number belongs to. Returned trimmed-only. + assert {:ok, "2025550100"} = PhoneFormatter.normalize("2025550100") end - test "handles 11-digit number starting with 1" do - assert {:ok, "+12345678901"} = PhoneFormatter.normalize("12345678901") + test "leaves formatted bare number trimmed-but-otherwise-unchanged" do + assert {:ok, "(202) 555-0100"} = PhoneFormatter.normalize("(202) 555-0100") end - test "handles international number with +" do - assert {:ok, "+442079460958"} = PhoneFormatter.normalize("+44 20 7946 0958") + test "trims whitespace around E.164 input" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize(" +1 202 555 0100 ") + end + + test "returns unparseable +prefixed input as-is" do + # +0 is not a valid country code; libphonenumber rejects it. + assert {:ok, "+0"} = PhoneFormatter.normalize("+0") + end + end + + describe "normalize/2 (with default region)" do + test "parses bare US number to E.164 with US region" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize("(202) 555-0100", "US") + end + + test "parses bare UK number to E.164 with GB region" do + assert {:ok, "+442079460958"} = PhoneFormatter.normalize("020 7946 0958", "GB") + end + + test "parses bare French number to E.164 with FR region" do + assert {:ok, "+33612345678"} = PhoneFormatter.normalize("06 12 34 56 78", "FR") + end + + test "+prefixed number ignores the default region argument" do + # The number is unambiguously German; passing "US" must not override. + assert {:ok, "+4915155555555"} = PhoneFormatter.normalize("+49 151 5555 5555", "US") + end + + test "explicit nil region is equivalent to normalize/1" do + assert PhoneFormatter.normalize("(202) 555-0100") == + PhoneFormatter.normalize("(202) 555-0100", nil) + end + + test "returns original on unparseable input with region" do + assert {:ok, "garbage"} = PhoneFormatter.normalize("garbage", "US") + end + + test "returns nil for nil regardless of region" do + assert {:ok, nil} = PhoneFormatter.normalize(nil, "FR") + end + end + + describe "region_for_locale/1" do + test "maps common locales to regions" do + assert "US" = PhoneFormatter.region_for_locale("en") + assert "FR" = PhoneFormatter.region_for_locale("fr") + assert "DE" = PhoneFormatter.region_for_locale("de") + assert "JP" = PhoneFormatter.region_for_locale("ja") + end + + test "strips locale subtag" do + assert "US" = PhoneFormatter.region_for_locale("en-GB") + assert "FR" = PhoneFormatter.region_for_locale("fr_CA") end - test "adds + to 7+ digit numbers without it" do - assert {:ok, "+1234567"} = PhoneFormatter.normalize("1234567") + test "returns nil for unknown locales" do + assert is_nil(PhoneFormatter.region_for_locale("xx")) + assert is_nil(PhoneFormatter.region_for_locale("")) + assert is_nil(PhoneFormatter.region_for_locale(nil)) end + end + + describe "supported_regions/1" do + test "returns parser-supported regions with localized labels and calling codes" do + regions = PhoneFormatter.supported_regions("en") + + # libphonenumber supports ~250 regions; we intersect with CLDR + # country_codes so continents/aggregates are excluded. + assert length(regions) > 200 + + assert Enum.all?(regions, fn {code, label} -> + is_binary(code) and byte_size(code) == 2 and + is_binary(label) and String.contains?(label, "(+") + end) + + # Spot-check known entries + assert Enum.find(regions, fn {code, _} -> code == "US" end) == + {"US", "United States (+1)"} + + assert {_code, label} = Enum.find(regions, fn {code, _} -> code == "GB" end) + assert label =~ "United Kingdom" + assert label =~ "+44" + end + + test "returns localized names for non-English locales" do + en = PhoneFormatter.supported_regions("en") |> Map.new() + fr = PhoneFormatter.supported_regions("fr") |> Map.new() - test "preserves short numbers as-is" do - assert {:ok, "12345"} = PhoneFormatter.normalize("12345") + refute en["US"] == fr["US"] + assert fr["US"] =~ "(+1)" end - test "handles whitespace" do - assert {:ok, "+12345678901"} = PhoneFormatter.normalize(" +1 234 567 8901 ") + test "is sorted by label" do + regions = PhoneFormatter.supported_regions("en") + labels = Enum.map(regions, &elem(&1, 1)) + assert labels == Enum.sort(labels) end end diff --git a/test/kith/imports/sources/monica_api_test.exs b/test/kith/imports/sources/monica_api_test.exs index 85c8aab..59ecc4c 100644 --- a/test/kith/imports/sources/monica_api_test.exs +++ b/test/kith/imports/sources/monica_api_test.exs @@ -866,7 +866,47 @@ defmodule Kith.Imports.Sources.MonicaApiTest do |> Enum.map(& &1.value) |> Enum.sort() - assert fields == ["+5551234", "fieldy@test.com"] + # Without a `phone_default_region` in opts, bare numbers round-trip + # trimmed-but-unchanged — opt-in normalization preserves user input + # when the importer can't safely guess a country. + assert fields == ["555-1234", "fieldy@test.com"] + end + + test "normalizes phone fields to E.164 when phone_default_region is set", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 42, + first_name: "Regional", + contact_fields: [ + contact_field_json(content: "(202) 555-0100", type_name: "Phone"), + contact_field_json(content: "+44 20 7946 0958", type_name: "Phone") + ] + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, _} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "phone_default_region" => "US" + }) + + rec = Imports.find_import_record(account_id, "monica_api", "contact", "42") + + fields = + Repo.all(from cf in Contacts.ContactField, where: cf.contact_id == ^rec.local_entity_id) + |> Enum.map(& &1.value) + |> Enum.sort() + + # Bare US number normalized via region hint; +-prefixed UK number ignores + # the US hint and uses its own country code. + assert "+12025550100" in fields + assert "+442079460958" in fields end end diff --git a/test/kith/workers/duplicate_detection_worker_test.exs b/test/kith/workers/duplicate_detection_worker_test.exs index 8f2728e..adb53d9 100644 --- a/test/kith/workers/duplicate_detection_worker_test.exs +++ b/test/kith/workers/duplicate_detection_worker_test.exs @@ -147,6 +147,41 @@ defmodule Kith.Workers.DuplicateDetectionWorkerTest do assert "email_match" in hd(candidates).reasons end + test "email matching trims surrounding whitespace", + %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Alice", + first_name: "Alice", + last_name: "" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob", + first_name: "Bob", + last_name: "" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => " Foo@BAR.com "}) + contact_field_fixture(c2, email_type_id, %{"value" => "foo@bar.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "email_match" in hd(candidates).reasons + end + + # Note: pure whitespace-only email values are rejected by the ContactField + # changeset's `validate_required`, so the cartesian-explosion case from + # Bug A can't actually manifest for emails through the normal write path. + # Coverage is provided by the "trims surrounding whitespace" case above, + # which exercises the same TRIM-in-JOIN code path on values that survive + # validation. + test "email-only match scores around 0.85", %{account: account, email_type_id: email_type_id} do c1 = insert(:contact, @@ -207,7 +242,10 @@ defmodule Kith.Workers.DuplicateDetectionWorkerTest do assert hd(candidates).score >= 0.7 end - test "phone matching normalizes formatting", %{account: account, phone_type_id: phone_type_id} do + test "phone matching is strict equality on canonical E.164 values", + %{account: account, phone_type_id: phone_type_id} do + # Phone normalization runs at write-time (see PhoneFormatter.normalize/2), + # so the detection worker assumes both values are already canonical. c1 = insert(:contact, account: account, @@ -224,8 +262,8 @@ defmodule Kith.Workers.DuplicateDetectionWorkerTest do last_name: "Williams" ) - contact_field_fixture(c1, phone_type_id, %{"value" => "+1-555-1234"}) - contact_field_fixture(c2, phone_type_id, %{"value" => "15551234"}) + contact_field_fixture(c1, phone_type_id, %{"value" => "+12025550100"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "+12025550100"}) assert :ok = run_detection(account.id) @@ -234,6 +272,44 @@ defmodule Kith.Workers.DuplicateDetectionWorkerTest do assert "phone_match" in hd(candidates).reasons end + test "does not cartesian-explode across formatting-only phone values", + %{account: account, phone_type_id: phone_type_id} do + # Regression for Bug A: previously the in-query regex normalized any + # zero-digit value to "" and matched it against every other zero-digit + # value (C(N,2) false candidates). With strict equality on canonical + # values plus a TRIM-non-empty filter, distinct garbage strings produce + # no matches. + # Use deliberately dissimilar names so pg_trgm doesn't generate + # name-based false positives and contaminate the assertion. + contacts_data = [ + {"Aaron Zephyr", "Aaron", "Zephyr", "+"}, + {"Quincy Bramble", "Quincy", "Bramble", "-"}, + {"Yolanda Khoury", "Yolanda", "Khoury", "()"}, + {"Vladimir Tcheng", "Vladimir", "Tcheng", "abc"}, + {"Saoirse Mwangi", "Saoirse", "Mwangi", "N/A"}, + {"Daiyu Olafsson", "Daiyu", "Olafsson", "x"} + ] + + for {display, first, last, garbage} <- contacts_data do + contact = + insert(:contact, + account: account, + display_name: display, + first_name: first, + last_name: last + ) + + contact_field_fixture(contact, phone_type_id, %{"value" => garbage}) + end + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + + assert candidates == [], + "expected no phone matches across distinct garbage values, got #{length(candidates)}" + end + test "phone-only match scores 0.75", %{account: account, phone_type_id: phone_type_id} do c1 = insert(:contact, diff --git a/test/kith/workers/monica_api_crawl_worker_test.exs b/test/kith/workers/monica_api_crawl_worker_test.exs index 082d567..05e0a86 100644 --- a/test/kith/workers/monica_api_crawl_worker_test.exs +++ b/test/kith/workers/monica_api_crawl_worker_test.exs @@ -61,6 +61,61 @@ defmodule Kith.Workers.MonicaApiCrawlWorkerTest do assert import_job.api_options["extra_notes"] == false end + test "build_opts forwards every wizard-saved option to the source module", + %{user: user, account_id: account_id} do + # Regression for Bug C: build_opts used to hand-curate a map containing + # only "extra_notes" — every other wizard option (auto_merge_duplicates, + # photos, pets, phone_default_region, …) was silently dropped before + # reaching MonicaApi.crawl/5. + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{ + "auto_merge_duplicates" => true, + "phone_default_region" => "US", + "photos" => true, + "pets" => true + } + }) + + opts = MonicaApiCrawlWorker.build_opts(import_job) + + assert opts["auto_merge_duplicates"] == true + assert opts["phone_default_region"] == "US" + assert opts["photos"] == true + assert opts["pets"] == true + # extra_notes defaults to true unless explicitly false + assert opts["extra_notes"] == true + end + + test "build_opts honors extra_notes=false explicitly", + %{user: user, account_id: account_id} do + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"extra_notes" => false} + }) + + assert MonicaApiCrawlWorker.build_opts(import_job)["extra_notes"] == false + end + + test "build_opts handles missing api_options map", %{user: user, account_id: account_id} do + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: nil + }) + + opts = MonicaApiCrawlWorker.build_opts(import_job) + assert opts["extra_notes"] == true + end + test "enqueues MonicaPhotoSyncWorker when photos opt-in", %{ user: user, account_id: account_id diff --git a/test/kith/workers/phone_renormalize_worker_test.exs b/test/kith/workers/phone_renormalize_worker_test.exs new file mode 100644 index 0000000..6441b54 --- /dev/null +++ b/test/kith/workers/phone_renormalize_worker_test.exs @@ -0,0 +1,132 @@ +defmodule Kith.Workers.PhoneRenormalizeWorkerTest do + use Kith.DataCase, async: true + use Oban.Testing, repo: Kith.Repo + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + alias Kith.Contacts.ContactField + alias Kith.Repo + alias Kith.Workers.PhoneRenormalizeWorker + + setup do + seed_reference_data!() + + # Default account locale is "en" — see Account schema, which maps to "US" + # via PhoneFormatter.region_for_locale/1. + user = user_fixture() + account_id = user.account_id + + phone_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "tel:", + select: %{id: t.id}, + limit: 1 + ) + + email_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "mailto:", + select: %{id: t.id}, + limit: 1 + ) + + %{ + account_id: account_id, + phone_type_id: phone_type.id, + email_type_id: email_type.id + } + end + + defp insert_phone_raw(account_id, contact_id, phone_type_id, value) do + # Bypass changeset normalization so we can stash heuristic-era values that + # the new PhoneFormatter.normalize/2 would reject going forward. + now = DateTime.utc_now() |> DateTime.truncate(:second) + + {1, [%{id: id}]} = + Repo.insert_all( + "contact_fields", + [ + %{ + account_id: account_id, + contact_id: contact_id, + contact_field_type_id: phone_type_id, + value: value, + inserted_at: now, + updated_at: now + } + ], + returning: [:id] + ) + + id + end + + describe "perform/1" do + test "rewrites bare US phones to E.164 using account locale", + %{account_id: account_id, phone_type_id: phone_type_id} do + contact = contact_fixture(account_id) + id = insert_phone_raw(account_id, contact.id, phone_type_id, "2025550100") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + + assert Repo.get!(ContactField, id).value == "+12025550100" + end + + test "leaves valid E.164 values untouched (idempotence)", + %{account_id: account_id, phone_type_id: phone_type_id} do + contact = contact_fixture(account_id) + id = insert_phone_raw(account_id, contact.id, phone_type_id, "+12025550100") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + assert Repo.get!(ContactField, id).value == "+12025550100" + + # Re-run — should be a no-op. + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + assert Repo.get!(ContactField, id).value == "+12025550100" + end + + test "leaves unparseable values alone instead of clobbering", + %{account_id: account_id, phone_type_id: phone_type_id} do + contact = contact_fixture(account_id) + id = insert_phone_raw(account_id, contact.id, phone_type_id, "+") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + + # "+" is unparseable; PhoneFormatter.normalize returns it trimmed-as-is. + assert Repo.get!(ContactField, id).value == "+" + end + + test "does not touch email values", + %{account_id: account_id, email_type_id: email_type_id} do + contact = contact_fixture(account_id) + field = contact_field_fixture(contact, email_type_id, %{"value" => "user@example.com"}) + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + assert Repo.get!(ContactField, field.id).value == "user@example.com" + end + end + + describe "perform/1 all-accounts mode" do + test "iterates every account when no account_id arg supplied", + %{phone_type_id: phone_type_id} do + # Each user_fixture creates its own account. Insert one bare number per + # account; both should get rewritten to E.164. + user1 = user_fixture() + user2 = user_fixture() + + c1 = contact_fixture(user1.account_id) + c2 = contact_fixture(user2.account_id) + + id1 = insert_phone_raw(user1.account_id, c1.id, phone_type_id, "2025550100") + id2 = insert_phone_raw(user2.account_id, c2.id, phone_type_id, "2025550101") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{}) + + assert Repo.get!(ContactField, id1).value == "+12025550100" + assert Repo.get!(ContactField, id2).value == "+12025550101" + end + end +end From 39529384a03475f4c607b0de22ecb2b8414c0858 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 02:55:05 +0300 Subject: [PATCH 26/58] docs: design spec for Monica import performance fix --- ...026-05-16-monica-import-perf-fix-design.md | 313 ++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md diff --git a/docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md b/docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md new file mode 100644 index 0000000..e0d3c90 --- /dev/null +++ b/docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md @@ -0,0 +1,313 @@ +# Monica import performance fix — design + +**Status:** approved +**Date:** 2026-05-16 +**Branch:** `fix/duplicate-detection` +**Builds on:** commit `6af91bf` (the bug-fix that unleashed Phase 4) + +## Context + +The bug-fix commit `6af91bf` restored a previously broken contract: `MonicaApiCrawlWorker.build_opts/1` now forwards every wizard option to `MonicaApi.crawl/5`, not just `extra_notes`. That fix was correct — auto-merge, pets, calls, activities, gifts, debts, tasks, reminders, conversations were all designed to be controllable from the wizard, but `build_opts/1` was silently dropping them. + +The consequence: a per-contact phase that had been an unreachable no-op for the wizard UI's entire lifetime suddenly fires **eight HTTP endpoints per imported contact**. For a 1000-contact account that is 8000 API calls against Monica's default 60-req/min rate limit. Imports went from ~2 minutes (Phase 1 only) to multi-hour stalls, made worse by a double retry layer that resets its inner counter every outer retry — the symptom users see as "retry: got response with status 429, will retry in 59000ms, 3 attempts left" repeating forever. + +This design fixes that, plus a small handful of pre-existing and self-inflicted perf issues that compound the problem. + +### Problems being addressed + +1. **Phase 4 explosion** *(primary user-visible regression)* + `import_extra_data_types/5` walks every imported contact and fires up to 8 endpoints per contact unconditionally. There is no statistics-based short-circuit (unlike Phase 3 for notes), so contacts with zero pets/debts/gifts still incur a round-trip per endpoint. With ~1000 contacts × 8 endpoints = 8000 calls, the import cannot complete under Monica's default rate limit in reasonable wall-clock time. + +2. **Double retry layering** *(amplifier)* + `api_get_json_with_retry/4` (custom 65-second sleep loop, max 3 outer retries) wraps `Req.get`, which itself has built-in `:safe_transient` retry (max 3 inner retries, respects `Retry-After`). On a 429 these stack: up to 12 retry rounds for a single logical call, with up to ~12 minutes of cumulative sleep. The "3 attempts left always" log is the outer layer kicking off fresh inner-layer attempts. + +3. **No proactive throttle** *(amplifier)* + We make calls as fast as the BEAM lets us until Monica refuses. Every 429 burst then wastes a 59-second `Retry-After` window before traffic resumes. + +4. **`:persistent_term` global-GC storm** *(self-inflicted in PR 6af91bf)* + `phone_field_type?/1` caches one boolean per cft_id via `:persistent_term.put/2`. Each new key triggers a global GC of every BEAM process. On a cold import we warm 5-8 cft_ids back-to-back, stopping the world (LiveView, PubSub, PromEx, every Oban worker) each time. + +5. **Double libphonenumber normalization** *(self-inflicted)* + `MonicaApi.import_single_contact_field` normalizes via `PhoneFormatter.normalize/2`, then `Contacts.create_contact_field/2` calls `maybe_normalize_phone/1` which normalizes the already-canonical value again. Per phone field this is one wasted libphonenumber parse plus one wasted `Repo.get(ContactFieldType, cft_id)` DB round trip. + +6. **Pre-existing per-write `Repo.get` in `maybe_normalize_phone`** *(amplifier, not in scope here)* + `Contacts.maybe_normalize_phone/1` looks up the ContactFieldType per call to discover the protocol. For 5000 field writes that's 5000 DB queries. Not introduced by recent work; we sidestep it for the Monica path only. + +## Goal + +1. Restore the Monica import to a reasonable wall-clock runtime — Phase 1+2+3 should complete in minutes for ~1000 contacts; Phase 4 should run in the background and only fire endpoints that actually have data. +2. Eliminate the double-retry layering so a single 429 doesn't cascade into multi-minute log loops. +3. Add a client-side throttle so 429s become rare under normal Monica defaults. +4. Pay back the perf debt introduced in commit `6af91bf` (persistent_term GC storm, double normalization). + +**Out of scope** (noted for follow-up): +- Fixing `maybe_normalize_phone`'s per-write `Repo.get` for UI/API callers (still a per-field DB query for non-Monica paths, but Monica is the only path that creates fields at bulk scale). +- Account-locale-derived region for UI form phone writes (currently UI writes leave bare numbers untouched). +- Auto-detection of Monica's actual rate limit (defaults are hand-configured). +- Batched per-contact fetches via Monica's `?include=...` if/when supported. + +## Approach + +### Part 1 — Extract Phase 4 into a dedicated worker + +**New worker:** `Kith.Workers.MonicaMiscDataWorker`, queue `:imports`, max attempts 3, timeout 30 minutes. + +A single Oban job per import (not per contact). Takes args: +```elixir +%{ + "import_id" => integer, + "credential_url" => string, + "credential_api_key" => string, # same wipe-after-completion pattern as MonicaPhotoSyncWorker + "plan" => [%{"source_id" => integer, "local_id" => integer, "endpoints" => [string]}, ...] +} +``` + +Worker logic: +1. Load `import_job`; bail early if `status == "cancelled"`. +2. Iterate `plan` entries. +3. For each entry, load the local contact (`Repo.get`); skip if `deleted_at != nil`. +4. For each endpoint in the entry's list, call the corresponding fetch helper (e.g. `import_contact_pets/6`). +5. Accumulate per-endpoint counts (e.g. `%{pets: 17, calls: 4, activities: 0, ...}`). +6. After completion, update `import_job.summary` with a new `"misc"` key holding the counts. Broadcast via the existing PubSub topic `"import:#{account_id}"` so the wizard UI sees the update. + +The per-contact `import_contact_pets/calls/activities/gifts/debts/tasks/reminders/conversations` helpers move verbatim from `MonicaApi` into the new worker module. Their internals are unchanged. + +**`MonicaApi.crawl/5` changes:** +- Phase 4 (`import_extra_data_types/5`) deleted. +- The `crawl/5` return value's `summary` map gains a new key `:misc_data` (the plan list). Caller `MonicaApiCrawlWorker` consumes this to construct the misc worker's args. + +**`MonicaApiCrawlWorker.perform/1` changes:** +- After `Imports.update_import_status(:completed)`, alongside the existing `MonicaPhotoSyncWorker` enqueue, enqueue `MonicaMiscDataWorker` with the plan from `summary[:misc_data]`. +- The plan is removed from the persisted summary before writing (it's transit data, not a metric). + +### Part 2 — Throttle: Hammer-backed rate limiter + +**New module:** `Kith.Imports.Sources.MonicaApi.RateLimiter`. + +Single public function `wait!(host)`. Wraps `Hammer.check_rate(bucket, scale_ms, limit)` with: +- Bucket key: `"monica_api:#{URI.parse(url).host}"` — per-host so independent Monica instances don't share a quota. +- Scale: 60_000 ms. +- Limit: configurable via `Application.get_env(:kith, :monica_rate_limit, 55)`. + +55 is one token below Monica's documented default of 60/min, leaving safety margin. + +On `{:deny, _}`, sleep ~1100ms and recurse. (The `:deny` carries a retry-after but Hammer 6.x returns the bucket reset time, which can be over-conservative; a fixed small sleep paces us back into the window naturally.) + +**Call site:** `MonicaApi.api_get/3` calls `RateLimiter.wait!(credential.url)` before every `Req.get`. The new misc worker's helpers go through the same `api_get`, so they're throttled too. + +**Config:** +- `config/config.exs`: `config :kith, :monica_rate_limit, 55`. +- `config/test.exs`: `config :kith, :monica_rate_limit, 1_000_000` — effectively unthrottled so tests don't sleep. Throttle logic itself is exercised via its own test file with a temporarily lowered limit. + +### Part 3 — Retry: collapse to Req's built-in only + +**Delete:** +- `MonicaApi.api_get_json_with_retry/4` (lines 1109-1137 of current monica_api.ex) +- `@max_rate_limit_retries 3` +- `@rate_limit_sleep_ms :timer.seconds(65)` + +**Replace** `api_get_json/3` with a direct version: +```elixir +defp api_get_json(credential, url, params) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: status}} -> {:error, {:http, status}} + {:error, reason} -> {:error, reason} + end +end +``` + +**Update** `api_get/3` to configure Req's retry behavior explicitly: +```elixir +defp api_get(credential, url, params \\ []) do + RateLimiter.wait!(credential.url) + + options = [ + headers: [...], + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ Map.get(credential, :req_options, []) + + Req.get(url, options) +end +``` + +`max_retries: 5` (up from the implicit Req default of 3) so a sustained slow window doesn't terminate the call. Req's `:safe_transient` retry handles 429/5xx and respects `Retry-After` natively. + +The two error tuples that previously distinguished `:rate_limited` from other errors are no longer needed — `{:error, {:http, 429}}` is now self-describing and bubbles up to the same caller error-handling that already exists. + +### Part 4 — Statistics short-circuit + misc plan + +**New helper** `collect_misc_data/5` in `MonicaApi`, called inside the contact loop alongside the existing `collect_extra_notes/3`: + +```elixir +@misc_endpoints [ + {:calls, "number_of_calls"}, + {:activities, "number_of_activities"}, + {:gifts, "number_of_gifts"}, + {:debts, "number_of_debts"}, + {:tasks, "number_of_tasks"}, + {:reminders, "number_of_reminders"}, + {:conversations, "number_of_conversations"} +] + +defp collect_misc_data(deferred, api_contact, source_id, local_id, opts) do + stats = api_contact["statistics"] || %{} + + endpoints = + @misc_endpoints + |> Enum.filter(fn {key, stat_field} -> + opts[Atom.to_string(key)] != false and (stats[stat_field] || 0) > 0 + end) + |> Enum.map(&elem(&1, 0)) + + endpoints = if opts["pets"] != false, do: [:pets | endpoints], else: endpoints + + if endpoints == [] do + deferred + else + entry = %{source_id: source_id, local_id: local_id, endpoints: endpoints} + %{deferred | misc_data: [entry | deferred.misc_data]} + end +end +``` + +Rules: +- Endpoint is included only if (a) the wizard opt for that data type is not false **and** (b) Monica's stat field reports > 0 (or, for pets, the wizard opt is on — pets has no statistic field in Monica's payload). +- Contact contributes zero entries if every endpoint is filtered out — it's not even in the plan. +- Stat absent in payload is treated as ">0" (safer default; we'd rather make a wasted call than miss data). + +`deferred` (already threaded through `crawl/5`) gains a new key `misc_data: []`. After the contact loop completes, `deferred.misc_data` is the plan list passed to the misc worker. + +### Part 5 — Self-inflicted perf debt cleanup + +#### 5a. Replace `:persistent_term` cache with `ref_data` MapSet + +**Delete** `phone_field_type?/1` and `phone_field_type?(nil)` clauses in `monica_api.ex`. + +**Extend** `ref_data` from: +```elixir +%{contact_field_types: %{name => id}} +``` +to: +```elixir +%{ + contact_field_types: %{name => id}, + phone_cft_ids: MapSet.t() +} +``` + +`build_or_update_ref_data/3` computes `phone_cft_ids` once per ref_data refresh (1-2 queries per entire import, vs 5-8 GC-triggering `:persistent_term.put` calls). + +**Update** `normalize_field_value/3` to take `ctx` (already in scope at the caller) instead of just `opts`: +```elixir +defp normalize_field_value(value, cft_id, ctx) when is_binary(value) do + if MapSet.member?(ctx.ref_data.phone_cft_ids, cft_id) do + region = parse_region(ctx.opts["phone_default_region"]) + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value + end +end +``` + +#### 5b. Bypass `Contacts.maybe_normalize_phone` from Monica path + +**Extend** `Contacts.create_contact_field/2` to `create_contact_field/3` with `opts \\ []`: +```elixir +def create_contact_field(%Contact{} = contact, attrs, opts \\ []) do + attrs = if Keyword.get(opts, :normalize, true), do: maybe_normalize_phone(attrs), else: attrs + + %ContactField{contact_id: contact.id, account_id: contact.account_id} + |> ContactField.changeset(attrs) + |> Repo.insert() +end +``` + +Default `normalize: true` preserves behavior for UI/API callers (one line touched in `monica_api.ex` to pass `normalize: false`). + +This eliminates ~2000 redundant libphonenumber parses **and** ~5000 redundant `Repo.get(ContactFieldType, cft_id)` queries per typical 1000-contact import — all on the Monica path only. UI form path is unchanged. + +## Files to modify + +**Production code:** +- `lib/kith/imports/sources/monica_api/rate_limiter.ex` *(new)* — Hammer-backed throttle. +- `lib/kith/workers/monica_misc_data_worker.ex` *(new)* — Phase 4 worker. +- `lib/kith/imports/sources/monica_api.ex` — Phase 4 removed; `collect_misc_data/5` added; `phone_field_type?/1` deleted; `api_get_json_with_retry/4` deleted; `api_get/3` wraps `RateLimiter.wait!`; `normalize_field_value/3` takes `ctx`; `ref_data` extended with `phone_cft_ids`; `build_or_update_ref_data/3` computes that field; per-contact endpoint helpers (`import_contact_pets/calls/activities/gifts/debts/tasks/reminders/conversations`) relocated to the misc worker module. +- `lib/kith/workers/monica_api_crawl_worker.ex` — enqueues `MonicaMiscDataWorker` on successful completion. +- `lib/kith/contacts.ex` — `create_contact_field/2` → `create_contact_field/3` with `normalize: true` default. +- `config/config.exs` — `config :kith, :monica_rate_limit, 55`. +- `config/test.exs` — high override so tests don't sleep on the throttle. + +**Tests:** +- `test/kith/imports/sources/monica_api/rate_limiter_test.exs` *(new)* — under-limit allows, over-limit waits, per-host isolation, env override. +- `test/kith/workers/monica_misc_data_worker_test.exs` *(new)* — worker fires only planned endpoints; cancelled import skipped; summary populated; cred carried through args. +- `test/kith/imports/sources/monica_api_test.exs` *(extend)* — `crawl/5` enqueues misc worker with right plan; statistics-zero excluded; statistics-missing included; opt-outs honored; no per-contact endpoint stubs hit during main crawl. +- `test/kith/workers/monica_api_crawl_worker_test.exs` *(extend)* — boundary test for the enqueue. +- `test/kith/contacts_test.exs` *(extend or add)* — `create_contact_field/3` with `normalize: false` bypasses `maybe_normalize_phone`. + +## Existing functions to reuse + +- `MonicaPhotoSyncWorker` (`lib/kith/workers/monica_photo_sync_worker.ex`) — pattern for "enqueue from main crawl, carry credential through args, check `import.status` at top of `perform/1`, single Oban job that iterates contacts internally." +- `Imports.update_import_status/3` (existing) — pattern for writing `summary` updates that trigger the existing PubSub broadcast. +- `Phase 3 collect_extra_notes/3` (`lib/kith/imports/sources/monica_api.ex:583-599`) — pattern for "inspect statistics in the contact loop, accumulate a deferred entry, process after main crawl." +- `Hammer.check_rate/3` (existing dep) — token bucket primitive for the throttle. +- `Req`'s `:safe_transient` retry + `Retry-After` handling (default behavior) — single source of truth for retry logic after we delete the hand-rolled wrapper. + +## Verification + +1. **Unit tests:** + ``` + mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs \ + test/kith/workers/monica_misc_data_worker_test.exs \ + test/kith/imports/sources/monica_api_test.exs \ + test/kith/workers/monica_api_crawl_worker_test.exs \ + test/kith/contacts_test.exs + ``` + All green. + +2. **Static analysis:** `mix quality` — no new credo issues, no new dialyzer warnings beyond the existing `.dialyzer_ignore.exs` entries. + +3. **Manual dev test:** + - Reset dev account via `Kith.Workers.AccountResetWorker`. + - Re-import ~1000 Monica contacts with default wizard options (all 8 misc data types checked). + - **`MonicaApiCrawlWorker` should complete in well under 2 minutes** (Phase 1 paginated calls + auto-merge + cross-refs + extra notes — bounded by ~20-30 throttled requests). + - **`MonicaMiscDataWorker` should complete in single-digit minutes** for typical CRM data (most contacts have nothing in pets/debts/gifts). + - Logs should show **zero** `"retry: got response with status 429"` messages under normal Monica defaults. + - `import_job.summary` after main worker: `imported`, `merged`, `contacts`, `notes` populated. + - `import_job.summary["misc"]` after misc worker: per-endpoint counts. + - Duplicates tab: a small number of pending candidates, not 6000 (this validates the earlier bug-fix is still working). + +4. **Oban dashboard:** + - `MonicaApiCrawlWorker` job completes and disappears from `executing`. + - `MonicaMiscDataWorker` job appears separately, runs to completion. + - Both individually cancellable. + +5. **Rate limiter sanity (optional IEx):** + ```elixir + times = for _ <- 1..70 do + {time_us, _} = :timer.tc(fn -> + Kith.Imports.Sources.MonicaApi.RateLimiter.wait!("https://test.monica") + end) + time_us / 1_000 + end + {Enum.take(times, 55) |> Enum.sum(), Enum.drop(times, 55) |> Enum.sum()} + ``` + First 55 calls should be near-zero; remaining 15 should be ~1100ms each. + +## Risks + +- **Plan size in Oban args.** For typical CRMs the plan is small (5-15% of contacts contribute entries). At ~100k+ contact scale the args could grow large; if that happens, swap to a `misc_data_plan` jsonb column on `imports` and pass only `import_id`. Localized change, two lines. +- **Phase 4 status visibility.** Users see "import complete" when the main crawl finishes; misc data trickles in afterward. The wizard's PubSub channel already broadcasts summary updates, but the "complete" copy doesn't currently distinguish between "fully done" and "main done, misc running." Consider a UI follow-up: show a second progress line for `misc_data` if `summary["misc"]` is absent. +- **Misc worker cancellation race.** If the user cancels the import between main-crawl completion and the misc worker picking up the job, the misc worker checks `import.status == "cancelled"` at the top of `perform/1` and exits cleanly. If cancellation happens *mid-run*, the in-flight request finishes but no further requests fire. Same model as `MonicaPhotoSyncWorker` today. +- **Throttle starvation across concurrent imports.** If two users on the same Monica instance import simultaneously, they share the per-host bucket. Each gets ~half the throughput. Acceptable — Monica's actual limit is the shared resource anyway. +- **The pre-existing `maybe_normalize_phone` N+1 remains for UI form callers.** Not in scope; tracked as future work. Practical impact is invisible because UI writes happen one at a time. + +## Non-goals + +- Account-locale-derived region applied to UI form phone writes (separate change, larger surface). +- Hammer auto-detection of Monica's actual rate limit (Monica doesn't expose this in headers). +- Batched per-contact data fetches via Monica `?include=` query parameter (Monica's API doesn't currently support multi-resource includes for these endpoints). +- Splitting the misc worker into per-endpoint sub-workers (premature; single worker is simpler and the per-endpoint counts are already preserved). From fe213fd77089b9055fdfacdfbef369fe2f54beaf Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:04:28 +0300 Subject: [PATCH 27/58] docs: implementation plan for Monica import performance fix --- .../2026-05-16-monica-import-perf-fix.md | 1997 +++++++++++++++++ 1 file changed, 1997 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-16-monica-import-perf-fix.md diff --git a/docs/superpowers/plans/2026-05-16-monica-import-perf-fix.md b/docs/superpowers/plans/2026-05-16-monica-import-perf-fix.md new file mode 100644 index 0000000..ed3c8fa --- /dev/null +++ b/docs/superpowers/plans/2026-05-16-monica-import-perf-fix.md @@ -0,0 +1,1997 @@ +# Monica Import Performance Fix Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Restore the Monica importer to a reasonable runtime by extracting Phase 4 (per-contact extra data) into a dedicated throttled background worker, collapsing the double retry layer, and paying back the perf debt introduced in commit `6af91bf`. + +**Architecture:** Phase 4 moves out of `MonicaApi.crawl/5` into a new `MonicaMiscDataWorker` Oban job enqueued by the existing `MonicaApiCrawlWorker` on success. The new worker consumes a plan built during main crawl that pre-filters contacts by Monica's `statistics.number_of_*` fields. A single `Hammer`-backed `RateLimiter` paces every outbound Monica call (~55 req/min) so 429s become rare; the hand-rolled retry wrapper is deleted and `Req`'s built-in `:safe_transient` retry is the sole retry source. Two cleanups: phone-cft lookup moves from a `:persistent_term`-cached boolean into a `MapSet` on `ref_data`, and `Contacts.create_contact_field` accepts an explicit `normalize: false` option so the Monica path skips the redundant second normalization. + +**Tech Stack:** Elixir 1.18, Phoenix LiveView, Oban 2.18 (queue `:imports`), Req 0.5, Hammer 6.2, ex_phone_number 0.4. + +**Reference spec:** `docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md` + +--- + +## Task 1: Add the RateLimiter module + unit tests + +**Files:** +- Create: `lib/kith/imports/sources/monica_api/rate_limiter.ex` +- Create: `test/kith/imports/sources/monica_api/rate_limiter_test.exs` + +- [ ] **Step 1: Inspect the Hammer setup in the project so the new module uses the same backend** + +Run: `grep -rn "Hammer\|hammer:" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/config /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib --include='*.exs' --include='*.ex' | head -20` + +Expected: see `:hammer` config and an existing usage (e.g. `KithWeb.Plugs.RateLimiter`) calling `Hammer.check_rate/3`. Note the backend module (likely `Hammer.Backend.ETS`) so test setup mirrors it. + +- [ ] **Step 2: Write the failing tests** + +Create `test/kith/imports/sources/monica_api/rate_limiter_test.exs`: + +```elixir +defmodule Kith.Imports.Sources.MonicaApi.RateLimiterTest do + use ExUnit.Case, async: false + + alias Kith.Imports.Sources.MonicaApi.RateLimiter + + # Tests run with the real Hammer backend; we use a unique host per test + # so buckets do not collide between tests. + + setup do + # Force a low limit for predictable timing. + prev = Application.get_env(:kith, :monica_rate_limit) + Application.put_env(:kith, :monica_rate_limit, 3) + on_exit(fn -> Application.put_env(:kith, :monica_rate_limit, prev) end) + :ok + end + + defp unique_host, do: "test-#{System.unique_integer([:positive])}.example" + + describe "wait!/1" do + test "returns :ok immediately while under the per-minute budget" do + host = unique_host() + + {us, _} = + :timer.tc(fn -> + for _ <- 1..3, do: assert :ok = RateLimiter.wait!("https://#{host}") + end) + + assert us < 50_000, "expected sub-50ms for 3 calls under the budget, got #{us}us" + end + + test "sleeps once the budget is exhausted" do + host = unique_host() + for _ <- 1..3, do: RateLimiter.wait!("https://#{host}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host}") end) + + # One inter-call sleep (≈1100ms) is enough to clear back into the window + # for the test's tiny limit. Allow generous slack. + assert us >= 1_000_000, "expected ≥1s wait when over budget, got #{us}us" + end + + test "per-host buckets do not share quota" do + host_a = unique_host() + host_b = unique_host() + + for _ <- 1..3, do: RateLimiter.wait!("https://#{host_a}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host_b}") end) + assert us < 50_000, "host_b should be in its own bucket" + end + + test "extracts the host portion of a URL for the bucket key" do + url1 = "https://example.test/api/contacts" + url2 = "https://example.test/api/me" + + # Same host → same bucket → exhausting via url1 should impact url2. + for _ <- 1..3, do: RateLimiter.wait!(url1) + + {us, _} = :timer.tc(fn -> RateLimiter.wait!(url2) end) + assert us >= 1_000_000 + end + end +end +``` + +- [ ] **Step 3: Run the tests to verify they fail** + +Run: `mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs` + +Expected: FAIL with `(UndefinedFunctionError) function Kith.Imports.Sources.MonicaApi.RateLimiter.wait!/1 is undefined`. + +- [ ] **Step 4: Write the module** + +Create `lib/kith/imports/sources/monica_api/rate_limiter.ex`: + +```elixir +defmodule Kith.Imports.Sources.MonicaApi.RateLimiter do + @moduledoc """ + Per-host token bucket for outbound Monica API calls. + + Configured at one token below Monica's documented default of 60 requests + per minute, leaving a one-call safety margin so a small clock-skew or + burst on Monica's side does not push us into the 429 window. + + Configurable via: + + config :kith, :monica_rate_limit, + + per-test overrides via `Application.put_env/3`. + + Hammer (already a dep) supplies the underlying token bucket; we use a + bucket key per Monica host so independent Monica instances do not share + a quota. Calls block the caller process via `Process.sleep/1` until a + token is available, then return `:ok`. + """ + + @scale_ms 60_000 + @default_limit 55 + @retry_sleep_ms 1_100 + + @doc """ + Block until a request token is available for the given Monica host. + + `url_or_host` may be a full URL (the host is extracted) or a bare host + string. Returns `:ok` once a token has been claimed. + """ + @spec wait!(String.t()) :: :ok + def wait!(url_or_host) when is_binary(url_or_host) do + bucket = bucket_key(url_or_host) + limit = Application.get_env(:kith, :monica_rate_limit, @default_limit) + + case Hammer.check_rate(bucket, @scale_ms, limit) do + {:allow, _count} -> + :ok + + {:deny, _retry_after_ms} -> + Process.sleep(@retry_sleep_ms) + wait!(url_or_host) + end + end + + defp bucket_key(url_or_host) do + host = URI.parse(url_or_host).host || url_or_host + "monica_api:#{host}" + end +end +``` + +- [ ] **Step 5: Run the tests to verify they pass** + +Run: `mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs` + +Expected: PASS, 4 tests. + +- [ ] **Step 6: Verify the rest of the suite still passes** + +Run: `mix test` + +Expected: PASS, no new failures. (If `mix test` triggers Hammer initialization that wasn't set up, surface it now rather than later.) + +- [ ] **Step 7: Commit** + +```bash +cd /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection +git add lib/kith/imports/sources/monica_api/rate_limiter.ex test/kith/imports/sources/monica_api/rate_limiter_test.exs +git commit -m "feat: add Monica API per-host rate limiter (55/min)" +``` + +--- + +## Task 2: Config knobs for the rate limit + +**Files:** +- Modify: `config/config.exs` +- Modify: `config/test.exs` + +- [ ] **Step 1: Add the production default** + +Open `config/config.exs`. After the existing `config :ex_cldr, default_backend: Kith.Cldr` line (added in commit `6af91bf`), add: + +```elixir +# Outbound rate limit for Monica API calls. One below the documented +# default of 60 req/min leaves a one-call safety margin. +config :kith, :monica_rate_limit, 55 +``` + +- [ ] **Step 2: Add a high-ceiling override for tests** + +Open `config/test.exs`. After the existing `config :ex_phone_number, metadata_file: ...` line (added in commit `6af91bf`), add: + +```elixir +# Effectively unthrottled in tests — throttle logic is exercised in +# isolation in rate_limiter_test.exs, not via the full crawl integration. +config :kith, :monica_rate_limit, 1_000_000 +``` + +- [ ] **Step 3: Verify both configs compile and tests still pass** + +Run: `mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs && mix test` + +Expected: PASS. The rate_limiter test brackets its own override, so the high test default doesn't break it. The rest of the suite shouldn't notice. + +- [ ] **Step 4: Commit** + +```bash +git add config/config.exs config/test.exs +git commit -m "chore: configure Monica API rate limit (55/min prod, unlimited test)" +``` + +--- + +## Task 3: Wire RateLimiter into `api_get` and collapse the double retry + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`@max_rate_limit_retries`, `@rate_limit_sleep_ms`, `api_get`, `api_get_json`, `api_get_json_with_retry`) + +- [ ] **Step 1: Locate the existing functions** + +Run: `grep -n "@max_rate_limit_retries\|@rate_limit_sleep_ms\|defp api_get\|defp api_get_json" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: matches around lines 37-38 (module attrs), 1101 (`api_get`), 1109 (`api_get_json`), 1113-1118 (`api_get_json_with_retry`). + +- [ ] **Step 2: Add the alias** + +In `lib/kith/imports/sources/monica_api.ex`, find the `alias` block near the top (currently includes `Kith.Contacts.PhoneFormatter`). Add right after the existing aliases: + +```elixir + alias Kith.Imports.Sources.MonicaApi.RateLimiter +``` + +- [ ] **Step 3: Delete the two module attributes** + +In the same file, find and delete the lines: + +```elixir + @max_rate_limit_retries 3 + @rate_limit_sleep_ms :timer.seconds(65) +``` + +- [ ] **Step 4: Replace `api_get/3` with the throttled version** + +Find the existing `api_get/3`: + +```elixir + defp api_get(credential, url, params \\ []) do + headers = [{"Authorization", "Bearer #{credential.api_key}"}, {"Accept", "application/json"}] + req_options = Map.get(credential, :req_options, []) + options = [headers: headers, params: params] ++ req_options + + Req.get(url, options) + end +``` + +Replace with: + +```elixir + defp api_get(credential, url, params \\ []) do + RateLimiter.wait!(credential.url) + + headers = [{"Authorization", "Bearer #{credential.api_key}"}, {"Accept", "application/json"}] + req_options = Map.get(credential, :req_options, []) + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ req_options + + Req.get(url, options) + end +``` + +`max_retries: 5` overrides Req's default of 3 so a sustained 429 window doesn't terminate the call. `retry_log_level: :warn` keeps the existing log visibility. + +- [ ] **Step 5: Replace `api_get_json/3` and delete `api_get_json_with_retry/4`** + +Find: + +```elixir + defp api_get_json(credential, url, params) do + api_get_json_with_retry(credential, url, params, 0) + end + + defp api_get_json_with_retry(_credential, _url, _params, retries) + when retries >= @max_rate_limit_retries do + {:error, :rate_limited} + end + + defp api_get_json_with_retry(credential, url, params, retries) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> + {:ok, body} + + {:ok, %{status: 429}} -> + Logger.info( + "[MonicaApi] Rate limited, sleeping #{@rate_limit_sleep_ms}ms (retry #{retries + 1})" + ) + + Process.sleep(@rate_limit_sleep_ms) + api_get_json_with_retry(credential, url, params, retries + 1) + + {:ok, %{status: status}} -> + {:error, "Unexpected status: #{status}"} + + {:error, reason} -> + {:error, reason} + end + end +``` + +Replace the entire block with: + +```elixir + defp api_get_json(credential, url, params) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} + end + end +``` + +The `{:error, :rate_limited}` shape is preserved — it's matched by callers (e.g. line 183 of `crawl_contacts_loop`, line 949 of `fetch_extra_notes_for_contact`). After 5 internal Req retries we surface rate-limited rather than silently looping. + +- [ ] **Step 6: Run the existing Monica tests to verify behavior is preserved** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs test/kith/workers/monica_api_crawl_worker_test.exs` + +Expected: PASS. The contract callers depend on (`{:ok, body}` / `{:error, :rate_limited}` / `{:error, other}`) is unchanged. + +- [ ] **Step 7: Spot-check no dangling references to deleted attrs** + +Run: `grep -n "max_rate_limit_retries\|rate_limit_sleep_ms\|api_get_json_with_retry" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: no matches. If anything remains, delete it. + +- [ ] **Step 8: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex +git commit -m "refactor: collapse Monica double-retry to Req's built-in + RateLimiter" +``` + +--- + +## Task 4: Add `normalize: false` opt to `Contacts.create_contact_field` + +**Files:** +- Modify: `lib/kith/contacts.ex` (line 390) +- Modify: `test/kith/contacts_sub_entities_test.exs` + +- [ ] **Step 1: Inspect the existing test file to follow its setup pattern** + +Run: `grep -n "describe\|create_contact_field" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/test/kith/contacts_sub_entities_test.exs | head -20` + +Expected: see the existing describe blocks for `create_contact_field/2`. Note the setup helpers used (likely `setup_account()`, `seed_reference_data!()`). + +- [ ] **Step 2: Add a failing test** + +In `test/kith/contacts_sub_entities_test.exs`, find the `describe "create_contact_field/2"` block (or the location of existing contact_field tests) and add inside it: + +```elixir + test "create_contact_field/3 with normalize: false skips phone normalization", + %{account: account, phone_field_type: phone_type} do + contact = insert(:contact, account: account) + + # Value that PhoneFormatter.normalize/1 would change (no region, but + # +-prefixed numbers get parsed and re-emitted as canonical E.164). + # We assert the value is stored unchanged when normalization is skipped. + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = + Kith.Contacts.create_contact_field(contact, attrs, normalize: false) + + assert field.value == "+1 (202) 555-0100" + end + + test "create_contact_field/3 with normalize: true (default) normalizes phone", + %{account: account, phone_field_type: phone_type} do + contact = insert(:contact, account: account) + + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = Kith.Contacts.create_contact_field(contact, attrs) + assert field.value == "+12025550100" + end +``` + +If the existing tests don't already provide `phone_field_type` in the setup context, add a setup helper at the top of the describe block: + +```elixir + setup %{account: account} do + phone_type = + Kith.Repo.one!( + from t in "contact_field_types", + where: t.protocol == "tel:", + select: %{id: t.id}, + limit: 1 + ) + + {:ok, phone_field_type: phone_type} + end +``` + +Adapt this to whatever shape the file already uses — if the file's setup already returns the account, ensure the new helper merges with it rather than replacing it. + +- [ ] **Step 3: Run the new tests, expect the first to fail** + +Run: `mix test test/kith/contacts_sub_entities_test.exs -k "normalize"` + +Expected: One test fails (the 3-arity call) with `(UndefinedFunctionError) function Kith.Contacts.create_contact_field/3 is undefined`. The 2-arity test should already pass. + +- [ ] **Step 4: Implement the 3-arity version** + +Open `lib/kith/contacts.ex` and find `create_contact_field/2` (around line 390): + +```elixir + def create_contact_field(%Contact{} = contact, attrs) do + attrs = maybe_normalize_phone(attrs) + + %ContactField{contact_id: contact.id, account_id: contact.account_id} + |> ContactField.changeset(attrs) + |> Repo.insert() + end +``` + +Replace with: + +```elixir + def create_contact_field(%Contact{} = contact, attrs, opts \\ []) do + attrs = + if Keyword.get(opts, :normalize, true) do + maybe_normalize_phone(attrs) + else + attrs + end + + %ContactField{contact_id: contact.id, account_id: contact.account_id} + |> ContactField.changeset(attrs) + |> Repo.insert() + end +``` + +The default-arg `opts \\ []` keeps every existing 2-arity caller working without changes. Only callers that explicitly want to bypass normalization need to pass `normalize: false`. + +- [ ] **Step 5: Run the tests to verify both pass** + +Run: `mix test test/kith/contacts_sub_entities_test.exs -k "normalize"` + +Expected: PASS, both tests. + +- [ ] **Step 6: Run the full Contacts test files to verify no regressions** + +Run: `mix test test/kith/contacts_sub_entities_test.exs test/kith/contacts/contact_test.exs` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add lib/kith/contacts.ex test/kith/contacts_sub_entities_test.exs +git commit -m "feat: Contacts.create_contact_field/3 supports normalize: false opt" +``` + +--- + +## Task 5: Monica importer passes `normalize: false` to `create_contact_field` + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`create_contact_field/5` helper at ~line 452) +- Modify: `test/kith/imports/sources/monica_api_test.exs` + +- [ ] **Step 1: Locate the inner helper** + +Run: `grep -n "defp create_contact_field" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: one match near line 452. + +- [ ] **Step 2: Update the inner helper** + +Find: + +```elixir + defp create_contact_field(contact, field, cft_id, value, import_job) do + attrs = %{"value" => value, "contact_field_type_id" => cft_id} + + case Contacts.create_contact_field(contact, attrs) do +``` + +Replace the `Contacts.create_contact_field(contact, attrs)` call with: + +```elixir + case Contacts.create_contact_field(contact, attrs, normalize: false) do +``` + +The Monica path already normalizes phone values upfront in `normalize_field_value/3` using the user-chosen region. The downstream `Contacts.maybe_normalize_phone/1` would re-parse the same E.164 value and do a redundant `Repo.get(ContactFieldType, ...)` per write. Skipping it saves ~2000 libphonenumber parses and ~5000 DB round trips per 1000-contact import. + +- [ ] **Step 3: Write a test asserting Monica import doesn't double-normalize** + +Tricky to assert directly without instrumenting. Instead, add a behavioral test in `test/kith/imports/sources/monica_api_test.exs` that imports a phone field and verifies the stored value matches what `PhoneFormatter.normalize/2` would produce (i.e. the import path's own normalization is the single source of truth): + +In `test/kith/imports/sources/monica_api_test.exs`, find the existing test `"normalizes phone fields to E.164 when phone_default_region is set"` (added in commit `6af91bf`). Right after it, add: + +```elixir + test "phone normalization happens exactly once during import", + %{user: user, account_id: account_id} do + # Regression: Contacts.create_contact_field used to re-run + # maybe_normalize_phone on the already-E.164 value, costing one extra + # libphonenumber parse and one extra Repo.get per phone field. The + # behavioral assertion here is "value stored matches MonicaApi's own + # normalization output exactly, with no later mutation." + contacts = [ + contact_json( + id: 99, + first_name: "OnceOnly", + contact_fields: [ + contact_field_json(content: "(202) 555-0100", type_name: "Phone") + ] + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, _} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "phone_default_region" => "US" + }) + + rec = Imports.find_import_record(account_id, "monica_api", "contact", "99") + + values = + Repo.all(from cf in Contacts.ContactField, where: cf.contact_id == ^rec.local_entity_id) + |> Enum.map(& &1.value) + + assert "+12025550100" in values + end +``` + +This test passes both before and after Task 5; its purpose is to lock in the behavior so a future regression that re-introduces double-normalization (e.g. accidentally calling `normalize/1` with `nil` region on an already-canonical value) doesn't change the stored value. + +- [ ] **Step 4: Run the test and existing Monica tests** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS, all tests. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex test/kith/imports/sources/monica_api_test.exs +git commit -m "perf: skip redundant normalization in Monica contact_field writes" +``` + +--- + +## Task 6: Replace `:persistent_term` phone-cft cache with `ref_data` MapSet + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`crawl/5`, `build_or_update_ref_data/3`, `normalize_field_value/3`, delete `phone_field_type?/1`, delete `phone_field_type?(nil)`) + +- [ ] **Step 1: Locate the cache and the ref_data builders** + +Run: + +```bash +grep -n "phone_field_type?\|build_or_update_ref_data\|defp build_ref_data\|ref_data: ref_data\|ref_data ->" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex +``` + +Expected: matches at the cache (~line 432-450), `build_or_update_ref_data` (~line 864-870), `find_or_create_contact_field_types` (~line 956-966), and various ref_data references in the contact loop. + +- [ ] **Step 2: Read the existing `build_or_update_ref_data` and `find_or_create_contact_field_types`** + +Open `lib/kith/imports/sources/monica_api.ex`. Read both functions (~lines 860-970). Confirm the shape: `ref_data` is a map with keys including `contact_field_types: %{name => id}`. `build_or_update_ref_data` is called per page to merge in newly-discovered cft types. + +- [ ] **Step 3: Add a helper to compute phone-cft IDs from a set of cft IDs** + +In `lib/kith/imports/sources/monica_api.ex`, add a new private helper near the other ref_data helpers (place it just before `find_or_create_contact_field_types/2` so related code clusters together): + +```elixir + # Returns the subset of `cft_ids` whose protocol begins with "tel" (phone). + # Called when ref_data is built or refreshed; the resulting MapSet replaces + # the per-cft `:persistent_term` cache that triggered global GCs on cold + # imports. + defp phone_cft_ids(account_id, cft_ids) when is_list(cft_ids) do + Repo.all( + from t in Contacts.ContactFieldType, + where: t.id in ^cft_ids, + where: is_nil(t.account_id) or t.account_id == ^account_id, + where: fragment("? LIKE 'tel%'", t.protocol), + select: t.id + ) + |> MapSet.new() + end +``` + +Account scope mirrors the existing pattern in `find_or_create_contact_field_types/2`. The `is_nil(t.account_id)` clause handles system-wide cft types seeded in test/dev. + +- [ ] **Step 4: Extend `ref_data` to carry `phone_cft_ids`** + +Find `build_or_update_ref_data/3` (the initial build path, ~line 864): + +```elixir + defp build_or_update_ref_data(account_id, contacts, nil) do + cfts = collect_api_contact_field_types(contacts) + + %{ + contact_field_types: find_or_create_contact_field_types(account_id, cfts) + } + end +``` + +Replace with: + +```elixir + defp build_or_update_ref_data(account_id, contacts, nil) do + cfts = collect_api_contact_field_types(contacts) + cft_map = find_or_create_contact_field_types(account_id, cfts) + + %{ + contact_field_types: cft_map, + phone_cft_ids: phone_cft_ids(account_id, Map.values(cft_map)) + } + end +``` + +Find the update path (the function head matching when `ref_data` is non-nil, ~line 886): + +```elixir + defp build_or_update_ref_data(account_id, contacts, ref_data) do + new_cfts = + contacts + |> collect_api_contact_field_types() + |> Enum.reject(&Map.has_key?(ref_data.contact_field_types, &1)) + + %{ + ref_data | + contact_field_types: + Map.merge( + ref_data.contact_field_types, + find_or_create_contact_field_types(account_id, new_cfts) + ) + } + end +``` + +Replace with: + +```elixir + defp build_or_update_ref_data(account_id, contacts, ref_data) do + new_cfts = + contacts + |> collect_api_contact_field_types() + |> Enum.reject(&Map.has_key?(ref_data.contact_field_types, &1)) + + if new_cfts == [] do + ref_data + else + added = find_or_create_contact_field_types(account_id, new_cfts) + merged_types = Map.merge(ref_data.contact_field_types, added) + + %{ + ref_data + | contact_field_types: merged_types, + phone_cft_ids: + MapSet.union( + ref_data.phone_cft_ids, + phone_cft_ids(account_id, Map.values(added)) + ) + } + end + end +``` + +The short-circuit when `new_cfts == []` avoids running the phone-cft query on every page when no new cft types appear (the common case). + +- [ ] **Step 5: Update `normalize_field_value/3` to take `ctx`** + +Find `normalize_field_value` (~line 419): + +```elixir + defp normalize_field_value(nil, _cft_id, _opts), do: nil + + defp normalize_field_value(value, cft_id, opts) when is_binary(value) do + if phone_field_type?(cft_id) do + region = opts["phone_default_region"] + region = if region in [nil, ""], do: nil, else: region + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value + end + end +``` + +Replace with: + +```elixir + defp normalize_field_value(nil, _cft_id, _ctx), do: nil + + defp normalize_field_value(value, cft_id, ctx) when is_binary(value) do + if MapSet.member?(ctx.ref_data.phone_cft_ids, cft_id) do + region = parse_phone_region(ctx.opts["phone_default_region"]) + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value + end + end + + defp parse_phone_region(region) when region in [nil, ""], do: nil + defp parse_phone_region(region) when is_binary(region), do: region +``` + +- [ ] **Step 6: Update the call site in `import_single_contact_field/4`** + +Find (~line 406): + +```elixir + defp import_single_contact_field(contact, field, ref_data, ctx) do + cft_name = get_in(field, ["contact_field_type", "name"]) + cft_id = if cft_name, do: Map.get(ref_data.contact_field_types, cft_name) + raw_value = field["content"] + value = normalize_field_value(raw_value, cft_id, ctx.opts) +``` + +Change the last line to pass `ctx`: + +```elixir + value = normalize_field_value(raw_value, cft_id, ctx) +``` + +- [ ] **Step 7: Delete `phone_field_type?/1`** + +Delete both clauses (~lines 432-450): + +```elixir + defp phone_field_type?(nil), do: false + + defp phone_field_type?(cft_id) do + case :persistent_term.get({__MODULE__, :phone_cft, cft_id}, :miss) do + :miss -> + result = + Repo.exists?( + from(t in Contacts.ContactFieldType, + where: t.id == ^cft_id and fragment("? LIKE 'tel%'", t.protocol) + ) + ) + + :persistent_term.put({__MODULE__, :phone_cft, cft_id}, result) + result + + result -> + result + end + end +``` + +- [ ] **Step 8: Run the full Monica test suite** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs test/kith/workers/monica_api_crawl_worker_test.exs` + +Expected: PASS. The behavior is unchanged externally — phones still normalize correctly when a region is supplied — only the internal mechanism shifts from `:persistent_term`+lazy-DB-query to `MapSet`-on-`ref_data`. + +- [ ] **Step 9: Verify no `:persistent_term` reads remain in the file** + +Run: `grep -n ":persistent_term" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: no matches. + +- [ ] **Step 10: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex +git commit -m "perf: replace :persistent_term phone-cft cache with ref_data MapSet" +``` + +--- + +## Task 7: Add `collect_misc_data/5` and extend the deferred state + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`crawl_all_contacts/1` initial state, contact loop wiring, new `@misc_endpoints` attribute, `collect_misc_data/5`) +- Modify: `test/kith/imports/sources/monica_api_test.exs` + +- [ ] **Step 1: Find the deferred state initialization** + +Run: `grep -n "deferred:\|extra_notes: \[\]\|first_met_through: \[\]" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex | head -10` + +Expected: a match in `crawl_all_contacts/1` (~line 156-163) where `deferred` is initialized as `%{first_met_through: [], relationships: [], extra_notes: []}`. + +- [ ] **Step 2: Add `misc_data: []` to the deferred initial state** + +Open `lib/kith/imports/sources/monica_api.ex`. Find the initialization (~line 156): + +```elixir + defp crawl_all_contacts(ctx) do + initial_state = %{ + page: 1, + total: nil, + acc: %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, + deferred: %{first_met_through: [], relationships: [], extra_notes: []}, + ref_data: nil, + global_idx: 0 + } + + crawl_contacts_loop(ctx, initial_state) + end +``` + +Change to: + +```elixir + defp crawl_all_contacts(ctx) do + initial_state = %{ + page: 1, + total: nil, + acc: %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, + deferred: %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }, + ref_data: nil, + global_idx: 0 + } + + crawl_contacts_loop(ctx, initial_state) + end +``` + +- [ ] **Step 3: Add the `@misc_endpoints` module attribute and helper** + +Find the location just below the existing `defp collect_extra_notes` (~line 583-599). After it, add: + +```elixir + @misc_endpoints [ + {:calls, "number_of_calls"}, + {:activities, "number_of_activities"}, + {:gifts, "number_of_gifts"}, + {:debts, "number_of_debts"}, + {:tasks, "number_of_tasks"}, + {:reminders, "number_of_reminders"}, + {:conversations, "number_of_conversations"} + ] + + # Build a plan entry for a contact's per-contact extra-data endpoints. + # An endpoint is included only if (a) the wizard opt for that data type is + # not explicitly false AND (b) Monica's `statistics.number_of_X` reports + # > 0 (or the stat field is missing — safer to fetch than to silently + # skip when Monica's payload shape is unfamiliar). + # + # `:pets` has no statistics field in Monica's contact payload, so it is + # included whenever the wizard opt is on. The redundant fetch for pet-free + # contacts is the documented cost. + defp collect_misc_data(deferred, api_contact, source_id, local_id, opts) do + stats = api_contact["statistics"] || %{} + + endpoints = + @misc_endpoints + |> Enum.filter(fn {key, stat_field} -> + opts[Atom.to_string(key)] != false and (stats[stat_field] || 1) > 0 + end) + |> Enum.map(&elem(&1, 0)) + + endpoints = if opts["pets"] != false, do: [:pets | endpoints], else: endpoints + + if endpoints == [] do + deferred + else + entry = %{ + source_id: to_string(source_id), + local_id: local_id, + endpoints: Enum.map(endpoints, &Atom.to_string/1) + } + + %{deferred | misc_data: [entry | deferred.misc_data]} + end + end +``` + +Note: `endpoints` are stringified before storing in the plan because the plan will eventually be serialized into Oban job args (JSON-encoded), where atoms don't round-trip cleanly. + +Note on the `(stats[stat_field] || 1) > 0` line: `|| 1` is the safe-default behavior — when the stat field is missing or nil from Monica's payload, we treat it as "≥ 1" so the endpoint fires. We do not want to silently skip data. + +- [ ] **Step 4: Wire `collect_misc_data` into the contact processing loop** + +Find `collect_deferred_data/3` (the function that gathers deferred entries during the contact loop, ~line 569-580). It currently calls `collect_extra_notes`. Locate its callers (`import_api_contact_children/7` at ~line 377 or similar). + +Find the call site that invokes `collect_deferred_data` — the function signature is something like: + +```elixir + defp collect_deferred_data(api_contact, source_id, deferred) do + deferred + |> add_first_met_through_entry(api_contact, source_id) + |> add_relationship_entries(api_contact, source_id) + |> collect_extra_notes(api_contact, source_id) + end +``` + +The actual function name/shape may differ slightly — adapt. Add `collect_misc_data` as a step, threading through the `contact` (for its local id) and `opts`. Since `collect_deferred_data` currently only takes `(api_contact, source_id, deferred)`, the cleanest path is to **extend its signature** to take `(api_contact, source_id, local_id, deferred, opts)` and update the single caller in `import_api_contact_children/7`. + +In `import_api_contact_children/7` (~line 377), find the line: + +```elixir + deferred = collect_deferred_data(api_contact, source_id, deferred) +``` + +Change to: + +```elixir + deferred = collect_deferred_data(api_contact, source_id, contact.id, deferred, ctx.opts) +``` + +(`ctx.opts` was added to `ctx` in commit `6af91bf` — it's already in scope here.) + +Then update `collect_deferred_data` itself to accept the new args and call `collect_misc_data`: + +```elixir + defp collect_deferred_data(api_contact, source_id, local_id, deferred, opts) do + deferred + |> add_first_met_through_entry(api_contact, source_id) + |> add_relationship_entries(api_contact, source_id) + |> collect_extra_notes(api_contact, source_id) + |> collect_misc_data(api_contact, source_id, local_id, opts) + end +``` + +Adapt to the exact existing function body — the principle is: thread `local_id` and `opts` in, append the `|> collect_misc_data(...)` step. + +- [ ] **Step 5: Add `misc_data` to the `crawl/5` return summary** + +Find the `{:ok, %{...}}` map at the end of `crawl/5` (~line 129-138): + +```elixir + {:ok, + %{ + imported: acc.contacts, + contacts: acc.contacts, + notes: acc.notes, + skipped: acc.skipped, + merged: merge_result.merged, + error_count: error_count, + errors: Enum.take(all_errors, 50) + }} +``` + +Change to: + +```elixir + {:ok, + %{ + imported: acc.contacts, + contacts: acc.contacts, + notes: acc.notes, + skipped: acc.skipped, + merged: merge_result.merged, + error_count: error_count, + errors: Enum.take(all_errors, 50), + misc_data_plan: Enum.reverse(deferred.misc_data) + }} +``` + +The plan is reversed so contacts are listed in import order rather than the reverse-insertion order that `[entry | acc]` produces. `MonicaApiCrawlWorker` (next task) will read this key, use it for the misc-worker enqueue, then strip it before persisting the summary to the DB. + +Find where `deferred` is in scope at this return — it's the `_deferred` element from `crawl_all_contacts(ctx)` (~line 88). Currently the code only binds `{acc, deferred}` from that call but doesn't use `deferred` at the return. Locate the bind: + +```elixir + {acc, deferred} = crawl_all_contacts(ctx) +``` + +Confirm `deferred` is in scope for the return tuple. If it isn't (you may see `{acc, _deferred}` ignoring it, or the variable may be shadowed), un-ignore it. + +- [ ] **Step 6: Write a unit test for `collect_misc_data` shape** + +In `test/kith/imports/sources/monica_api_test.exs`, find an existing describe block for `crawl/5` (or add a new one near the end). Add tests: + +```elixir + describe "crawl/5 — misc-data plan" do + test "includes a contact when statistics.number_of_calls > 0", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 1, + first_name: "Has", + last_name: "Calls", + statistics: %{"number_of_calls" => 3} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false + }) + + assert [%{source_id: "1", endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints + end + + test "excludes a contact when all opts are off", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 2, + first_name: "AllOff", + statistics: %{"number_of_calls" => 5, "number_of_gifts" => 5} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => false, + "gifts" => false, + "pets" => false, + "activities" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert summary.misc_data_plan == [] + end + + test "includes :pets unconditionally when opt is on (no stat field)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 3, + first_name: "PetsOnly", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "pets" => true, + "calls" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: ["pets"]}] = summary.misc_data_plan + end + + test "missing statistic field is treated as ≥1 (safe default)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 4, + first_name: "NoStats", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints + end + end +``` + +Verify the test helpers `contact_json/1` and `contacts_page_json/4` accept a `statistics:` keyword. Check the existing test file for examples — if the helper doesn't currently take `statistics`, extend it to merge a `statistics:` key into the contact JSON. If you need to update the helper, do it in the same commit. + +- [ ] **Step 7: Run the new tests** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS, all tests (existing + 4 new). + +- [ ] **Step 8: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex test/kith/imports/sources/monica_api_test.exs +git commit -m "feat: collect misc-data plan during Monica crawl" +``` + +--- + +## Task 8: Create `MonicaMiscDataWorker` with relocated per-contact helpers + +**Files:** +- Create: `lib/kith/workers/monica_misc_data_worker.ex` +- Create: `test/kith/workers/monica_misc_data_worker_test.exs` + +This task creates the worker as a self-contained module. The per-contact endpoint helpers (`import_contact_pets`, `_calls`, `_activities`, `_gifts`, `_debts`, `_tasks`, `_reminders`, `_conversations`) are **copied** from `MonicaApi` into the worker. The duplication is temporary — Task 9 removes them from `MonicaApi` once the worker is wired up. This staging preserves a "main suite still green" checkpoint between Tasks 8 and 9. + +- [ ] **Step 1: Inspect `MonicaPhotoSyncWorker` for the canonical worker pattern** + +Run: `cat /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/workers/monica_photo_sync_worker.ex | head -90` + +Note: queue, `use Oban.Worker` options, perform/1 args shape, status check, credential rebuild from args, summary update at end, broadcast pattern. Mirror these. + +- [ ] **Step 2: List the per-contact helper boundaries in `MonicaApi`** + +Run: `grep -n "^ defp import_contact_\|^ defp import_single_pet\|^ defp import_single_call\|^ defp import_single_activit\|^ defp import_single_gift\|^ defp import_single_debt\|^ defp import_single_task\|^ defp import_single_reminder\|^ defp import_single_conversat" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: a list of all the per-endpoint functions plus their per-item siblings. Note line ranges for copying. + +- [ ] **Step 3: Write the failing test file first** + +Create `test/kith/workers/monica_misc_data_worker_test.exs`: + +```elixir +defmodule Kith.Workers.MonicaMiscDataWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + + alias Kith.Imports + alias Kith.Workers.MonicaMiscDataWorker + + @stub_name MonicaMiscDataReqStub + + setup do + user = user_fixture() + seed_reference_data!() + + Req.Test.set_req_test_from_context(self()) + + %{user: user, account_id: user.account_id} + end + + defp build_args(import_job, plan) do + %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key", + "plan" => plan, + "req_options" => [plug: {Req.Test, @stub_name}] + } + end + + defp api_import(account_id, user_id, api_options \\ %{}) do + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: api_options, + status: "completed" + }) + end + + describe "perform/1" do + test "fires only the endpoints listed in the plan", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + # Record all endpoint paths the worker calls. + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [ + %{ + "source_id" => "42", + "local_id" => contact.id, + "endpoints" => ["calls", "gifts"] + } + ] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + paths = collect_requests([]) + assert "/api/contacts/42/calls" in paths + assert "/api/contacts/42/gifts" in paths + refute "/api/contacts/42/pets" in paths + refute "/api/contacts/42/activities" in paths + end + + test "exits early when the import is cancelled", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + {:ok, _} = Imports.update_import_status(import_job, "cancelled", %{}) + + contact = contact_fixture(account_id) + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "skips contacts whose local row has been soft-deleted", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + contact = contact_fixture(account_id) + + Kith.Repo.update_all( + from(c in Kith.Contacts.Contact, where: c.id == ^contact.id), + set: [deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)] + ) + + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "writes per-endpoint counts to import_job.summary['misc']", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case conn.request_path do + "/api/contacts/1/calls" -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "called_at" => "2025-01-01", "contact_called" => true}, + %{"id" => 2, "called_at" => "2025-01-02", "contact_called" => false} + ] + }) + + _ -> + Req.Test.json(conn, %{"data" => []}) + end + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + updated = Imports.get_import!(import_job.id) + assert is_map(updated.summary["misc"]) + assert updated.summary["misc"]["calls"] >= 0 + end + end + + defp collect_requests(acc) do + receive do + {:request, path} -> collect_requests([path | acc]) + after + 0 -> Enum.reverse(acc) + end + end +end +``` + +The stub-via-Req.Test pattern matches what `monica_api_test.exs` already uses; copy whichever helper that file relies on if there's a shared fixture (e.g. `contact_field_json/1`). + +The `req_options` arg shape in `build_args/2` mirrors how the existing photo sync worker test injects `Req.Test` stubs into the worker; if the codebase uses a different injection point (e.g. via `Application.put_env`), adapt to that. + +- [ ] **Step 4: Run the test file, expect compilation failure** + +Run: `mix test test/kith/workers/monica_misc_data_worker_test.exs` + +Expected: FAIL with `(UndefinedFunctionError) function Kith.Workers.MonicaMiscDataWorker.__info__/1 is undefined`. + +- [ ] **Step 5: Implement the worker skeleton + relocated helpers** + +Create `lib/kith/workers/monica_misc_data_worker.ex`: + +```elixir +defmodule Kith.Workers.MonicaMiscDataWorker do + @moduledoc """ + Oban worker that imports the per-contact "miscellaneous" data types + (pets, calls, activities, gifts, debts, tasks, reminders, conversations) + for an already-completed Monica API crawl. + + Enqueued by `Kith.Workers.MonicaApiCrawlWorker` on successful completion, + carrying: + + * `"import_id"` — the Import row this job belongs to. + * `"credential_url"`, `"credential_api_key"` — the credential needed to + keep calling Monica after the main crawl wipes `api_key_encrypted`. + Same pattern as `MonicaPhotoSyncWorker`. + * `"plan"` — list of `%{"source_id", "local_id", "endpoints"}` maps + pre-filtered during the main crawl using Monica's `statistics.*` + fields, so we only fire the endpoints with data. + + Throttled through `Kith.Imports.Sources.MonicaApi.RateLimiter` (same + per-host bucket as the main crawler). + + Exits early if the import has been cancelled. Contacts that were + soft-deleted between main-crawl completion and this job's dispatch are + silently skipped. + """ + + use Oban.Worker, queue: :imports, max_attempts: 3 + + require Logger + + import Ecto.Query, warn: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Imports.Sources.MonicaApi.RateLimiter + + @impl Oban.Worker + def timeout(_job), do: :timer.minutes(30) + + @impl Oban.Worker + def perform(%Oban.Job{args: args}) do + import_job = Imports.get_import!(args["import_id"]) + + if import_job.status in ["cancelled", "failed"] do + :ok + else + credential = build_credential(args) + plan = args["plan"] || [] + + counts = process_plan(plan, credential, import_job) + + summary = Map.put(import_job.summary || %{}, "misc", counts) + + Imports.update_import_status(import_job, import_job.status, %{summary: summary}) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_misc_complete, counts}) + + :ok + end + end + + defp build_credential(args) do + %{ + url: args["credential_url"], + api_key: args["credential_api_key"], + req_options: args["req_options"] || [] + } + end + + defp process_plan(plan, credential, import_job) do + initial = %{ + "pets" => 0, + "calls" => 0, + "activities" => 0, + "gifts" => 0, + "debts" => 0, + "tasks" => 0, + "reminders" => 0, + "conversations" => 0 + } + + Enum.reduce(plan, initial, fn entry, counts -> + process_entry(entry, credential, import_job, counts) + end) + end + + defp process_entry(entry, credential, import_job, counts) do + contact = Contacts.get_contact_for_misc(entry["local_id"]) + + if contact == nil or not is_nil(contact.deleted_at) do + counts + else + Enum.reduce(entry["endpoints"] || [], counts, fn endpoint, counts -> + n = fire_endpoint(endpoint, credential, contact, entry["source_id"], import_job) + Map.update(counts, endpoint, n, &(&1 + n)) + end) + end + end + + defp fire_endpoint("pets", c, contact, src, ij), do: import_contact_pets(c, contact, src, ij) + + defp fire_endpoint("calls", c, contact, src, ij), + do: import_contact_calls(c, contact, src, ij) + + defp fire_endpoint("activities", c, contact, src, ij), + do: import_contact_activities(c, contact, src, ij) + + defp fire_endpoint("gifts", c, contact, src, ij), + do: import_contact_gifts(c, contact, src, ij) + + defp fire_endpoint("debts", c, contact, src, ij), + do: import_contact_debts(c, contact, src, ij) + + defp fire_endpoint("tasks", c, contact, src, ij), + do: import_contact_tasks(c, contact, src, ij) + + defp fire_endpoint("reminders", c, contact, src, ij), + do: import_contact_reminders(c, contact, src, ij) + + defp fire_endpoint("conversations", c, contact, src, ij), + do: import_contact_conversations(c, contact, src, ij) + + defp fire_endpoint(other, _, _, _, _) do + Logger.warning("[MonicaMiscData] unknown endpoint #{inspect(other)}; skipping") + 0 + end + + # ── Relocated per-contact helpers ──────────────────────────────────── + # + # Each helper makes one GET against Monica and inserts the returned items. + # Bodies are copied verbatim from MonicaApi; Task 9 removes the originals. + # Helpers return an integer count of successfully imported items so the + # worker can aggregate it into `summary["misc"]`. + + # PASTE THE BODIES OF THE FOLLOWING FUNCTIONS FROM monica_api.ex HERE, + # ADAPTED TO THE NEW (credential, contact, source_id, import_job) SHAPE + # AND RETURNING AN INTEGER COUNT: + # + # import_contact_pets/6 -> import_contact_pets/4 + # import_contact_calls/7 -> import_contact_calls/4 + # import_contact_activities/7 -> import_contact_activities/4 + # import_contact_gifts/6 -> import_contact_gifts/4 + # import_contact_debts/6 -> import_contact_debts/4 + # import_contact_tasks/6 -> import_contact_tasks/4 + # import_contact_reminders/6 -> import_contact_reminders/4 + # import_contact_conversations/7 -> import_contact_conversations/4 + # + # Together with their per-item siblings (import_single_pet, etc.). + # + # base_url is now derived from `credential.url` inside each helper. + # account_id is now derived from `contact.account_id`. + # user_id is no longer needed (calls/activities/conversations are not + # user-scoped; if any helper currently uses user_id only for audit-log + # author, fall back to `import_job.user_id`). + # + # IMPORTANT: every helper that today calls api_get_json must continue to + # call it via `Kith.Imports.Sources.MonicaApi.api_get_json/3` (or the + # equivalent unified helper). To avoid coupling, copy `api_get_json` + # into this module as a small private wrapper that goes through Req + + # RateLimiter the same way: + + defp api_get_json(credential, url, params) do + RateLimiter.wait!(credential.url) + + headers = [ + {"Authorization", "Bearer #{credential.api_key}"}, + {"Accept", "application/json"} + ] + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ Map.get(credential, :req_options, []) + + case Req.get(url, options) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} + end + end + + defp maybe_record_entity(_import_job, _, nil, _, _), do: :ok + + defp maybe_record_entity(import_job, source_type, source_id, local_type, local_id) do + Imports.record_imported_entity( + import_job, + source_type, + to_string(source_id), + local_type, + local_id + ) + end +end +``` + +Now copy the actual bodies of `import_contact_pets/6`, `import_single_pet/4`, `import_contact_calls/7`, `import_single_call/5`, `import_contact_activities/7`, `import_single_activity/5`, `import_contact_gifts/6`, `import_single_gift/4`, `import_contact_debts/6`, `import_single_debt/4`, `import_contact_tasks/6`, `import_single_task/4`, `import_contact_reminders/6`, `import_single_reminder/4`, `import_contact_conversations/7`, and `import_single_conversation/5` (or whatever the exact per-item function names are) from `lib/kith/imports/sources/monica_api.ex` into this new module. + +For each top-level helper, adapt the signature: + +**Before** (in MonicaApi): +```elixir +defp import_contact_pets(credential, base_url, account_id, contact, source_id, import_job) do + url = "#{base_url}/api/contacts/#{source_id}/pets" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => pets}} when is_list(pets) -> + Enum.flat_map(pets, fn pet -> + import_single_pet(account_id, contact, pet, import_job) + end) + + {:ok, _} -> + [] + + {:error, reason} -> + ["Failed to fetch pets for contact #{source_id}: #{inspect(reason)}"] + end +end +``` + +**After** (in MonicaMiscDataWorker): +```elixir +defp import_contact_pets(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/pets" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => pets}} when is_list(pets) -> + Enum.count(pets, fn pet -> + case import_single_pet(contact.account_id, contact, pet, import_job) do + [] -> true # success — no error string + _ -> false + end + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch pets for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end +end +``` + +Apply the same adaptation to all eight top-level helpers. Keep their per-item siblings (`import_single_pet`, `import_single_call`, etc.) unchanged in body — just paste them as-is. The signature change is *only* at the top-level (the function the worker's `fire_endpoint` dispatches to). + +Each top-level helper now returns an integer count instead of a list of errors. Errors become warning logs (Phase 4 errors are not user-actionable; logging is enough). + +- [ ] **Step 6: Add the `Contacts.get_contact_for_misc/1` lookup helper** + +The worker calls `Contacts.get_contact_for_misc/1`. This is a tiny helper avoiding `Repo.get` directly. Add to `lib/kith/contacts.ex`, near `get_contact_field!/2`: + +```elixir + @doc """ + Fetch a contact by ID without scope enforcement, for use by the + Monica misc-data worker. The worker has already verified the contact + belongs to an import the user authorized; we just need the row. + + Returns `nil` if not found. + """ + def get_contact_for_misc(id) when is_integer(id) or is_binary(id) do + Repo.get(Contact, id) + end +``` + +(Alternative: use `Repo.get(Kith.Contacts.Contact, local_id)` directly in the worker — but adding the named helper makes the intent self-documenting and keeps the worker free of direct Repo imports.) + +- [ ] **Step 7: Run the worker test** + +Run: `mix test test/kith/workers/monica_misc_data_worker_test.exs` + +Expected: PASS, all 4 tests. (Some assertions are deliberately loose — e.g. `>= 0` — because the per-item insertion paths may fail validation on fixture data that lacks required fields; the assertion is "the worker called the endpoint and updated the summary," not "every fixture inserted successfully." Tighten if you choose to set up richer fixtures.) + +- [ ] **Step 8: Run the full suite to verify duplicated helpers still pass their existing tests** + +Run: `mix test` + +Expected: PASS. Both `MonicaApi.import_contact_pets/6` (still there) and `MonicaMiscDataWorker.import_contact_pets/4` (newly added) coexist temporarily. Existing tests of the inline Phase 4 path continue to pass. + +- [ ] **Step 9: Commit** + +```bash +git add lib/kith/workers/monica_misc_data_worker.ex test/kith/workers/monica_misc_data_worker_test.exs lib/kith/contacts.ex +git commit -m "feat: add MonicaMiscDataWorker (per-contact extra data, plan-driven)" +``` + +--- + +## Task 9: Cut over — remove inline Phase 4 from `MonicaApi` and enqueue the misc worker + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (delete `import_extra_data_types/5`, `import_per_contact_data/7`, eight `import_contact_*` helpers + their `import_single_*` siblings; remove Phase 4 invocation from `crawl/5`; remove `extra_data_errors` accumulation) +- Modify: `lib/kith/workers/monica_api_crawl_worker.ex` (enqueue `MonicaMiscDataWorker`, strip plan from persisted summary) +- Modify: `test/kith/workers/monica_api_crawl_worker_test.exs` (boundary test for misc-worker enqueue) + +This is the largest task; double-check after each deletion that nothing else in `MonicaApi` references the removed functions. + +- [ ] **Step 1: Locate Phase 4 invocation in `crawl/5`** + +Run: `grep -n "import_extra_data_types\|extra_data_errors" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: matches in `crawl/5` (~lines 110-127) and the function definition (~line 1275). + +- [ ] **Step 2: Remove the Phase 4 invocation from `crawl/5`** + +Find the block (~line 109-127): + +```elixir + # Phase 4: Additional data types (per-contact endpoints) + extra_data_errors = + import_extra_data_types(credential, account_id, user_id, import_job, opts) + + # Phase 5: Enqueue document import jobs (async, runs after main import) + if opts["documents"] do + enqueue_document_imports(credential, account_id, user_id, import_job) + end + + all_errors = + acc.errors ++ + ref_errors ++ + notes_errors ++ + merge_result.errors ++ + extra_data_errors + + error_count = + acc.error_count + length(ref_errors) + length(notes_errors) + + length(merge_result.errors) + length(extra_data_errors) +``` + +Replace with: + +```elixir + # Phase 5: Enqueue document import jobs (async, runs after main import) + if opts["documents"] do + enqueue_document_imports(credential, account_id, user_id, import_job) + end + + all_errors = + acc.errors ++ + ref_errors ++ + notes_errors ++ + merge_result.errors + + error_count = + acc.error_count + length(ref_errors) + length(notes_errors) + + length(merge_result.errors) +``` + +- [ ] **Step 3: Delete the eight top-level per-contact helpers and their `import_single_*` siblings** + +Delete the entire blocks (function + Phase header comment) for: + +- `import_extra_data_types/5` and its docstring/comment header +- `import_per_contact_data/7` +- `import_contact_pets/6` + `import_single_pet/4` +- `import_contact_calls/7` + `import_single_call/5` +- `import_contact_activities/7` + `import_single_activity/5` +- `import_contact_gifts/6` + `import_single_gift/4` +- `import_contact_debts/6` + `import_single_debt/4` +- `import_contact_tasks/6` + `import_single_task/4` +- `import_contact_reminders/6` + `import_single_reminder/4` +- `import_contact_conversations/7` + `import_single_conversation/5` + +Use grep to find their exact line ranges: + +```bash +grep -n "^ defp import_contact_\|^ defp import_single_\|^ defp import_extra_data_types\|^ defp import_per_contact_data\|^ # ── Phase " /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex +``` + +Delete each function body from `defp ... do` through the matching `end`. Also delete the `# ── Phase 5: Pets ─...`, `# ── Phase 6: Calls ─...` etc. comment headers, plus the parent `# ── Phases 5-12: Additional per-contact data types ─...` header. + +Do NOT delete `enqueue_document_imports/4` or `Phase 5: Enqueue document import jobs` — those still belong to `MonicaApi` (documents are handled by a separate worker, not the misc worker). + +- [ ] **Step 4: Verify no dangling references inside `MonicaApi`** + +Run: `grep -n "import_contact_\|import_single_pet\|import_single_call\|import_single_activit\|import_single_gift\|import_single_debt\|import_single_task\|import_single_reminder\|import_single_conversat\|import_extra_data_types\|import_per_contact_data\|extra_data_errors" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: no matches. If any remain, delete or update them. + +- [ ] **Step 5: Compile and run Monica + crawl-worker tests** + +Run: `mix compile --warnings-as-errors && mix test test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS. Tests that previously exercised Phase 4 inline (if any) need updating — they should now assert that the misc-data plan is built but Phase 4 endpoints are NOT hit during `crawl/5`. Locate any failing test and replace its assertion (e.g. "asserts 1 pet was inserted") with the new contract (e.g. "asserts the misc_data_plan includes the pets endpoint for this contact"). + +- [ ] **Step 6: Wire `MonicaApiCrawlWorker` to enqueue the misc worker** + +Open `lib/kith/workers/monica_api_crawl_worker.ex`. Find the `perform/1` success branch (around line 41-58): + +```elixir + now = DateTime.utc_now() |> DateTime.truncate(:second) + summary_map = ensure_map(summary) + + Imports.update_import_status(import_job, "completed", %{ + summary: summary_map, + completed_at: now + }) + + Imports.wipe_api_key(import_job) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) + + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) + + # Enqueue photo sync (separate job) if the user opted in + maybe_enqueue_photo_sync(import_job) + + Logger.info("MonicaApi import #{import_id} completed: #{inspect(summary_map)}") + :ok +``` + +Insert the misc-worker enqueue and strip the plan from the persisted summary: + +```elixir + now = DateTime.utc_now() |> DateTime.truncate(:second) + summary_map = ensure_map(summary) + {misc_plan, persisted_summary} = Map.pop(summary_map, :misc_data_plan, []) + persisted_summary = Map.delete(persisted_summary, "misc_data_plan") + + Imports.update_import_status(import_job, "completed", %{ + summary: persisted_summary, + completed_at: now + }) + + maybe_enqueue_misc_data_worker(import_job, misc_plan) + Imports.wipe_api_key(import_job) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, persisted_summary}) + + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) + + # Enqueue photo sync (separate job) if the user opted in + maybe_enqueue_photo_sync(import_job) + + Logger.info("MonicaApi import #{import_id} completed: #{inspect(persisted_summary)}") + :ok +``` + +Note: the `maybe_enqueue_misc_data_worker` call happens BEFORE `wipe_api_key` because the worker needs the still-encrypted key passed as an arg, mirroring the photo-sync pattern. + +Add the helper below `maybe_enqueue_photo_sync/1`: + +```elixir + defp maybe_enqueue_misc_data_worker(_import_job, []), do: :ok + + defp maybe_enqueue_misc_data_worker(import_job, plan) do + %{ + "import_id" => import_job.id, + "credential_url" => import_job.api_url, + "credential_api_key" => import_job.api_key_encrypted, + "plan" => plan + } + |> Kith.Workers.MonicaMiscDataWorker.new() + |> Oban.insert() + end +``` + +Add the alias near the top of the file alongside `MonicaPhotoSyncWorker`: + +```elixir + alias Kith.Workers.MonicaMiscDataWorker +``` + +- [ ] **Step 7: Add a boundary regression test** + +In `test/kith/workers/monica_api_crawl_worker_test.exs`, add a new test inside `describe "perform/1"`: + +```elixir + test "enqueues MonicaMiscDataWorker with the plan from crawl summary", + %{user: user, account_id: account_id} do + # This boundary test guards the wizard → crawl → misc-worker contract: + # the misc_data_plan key produced by MonicaApi.crawl/5 must reach + # MonicaMiscDataWorker.new/1 unmodified, just as auto_merge_duplicates + # had to reach MonicaApi.crawl/5 (Bug C in the previous PR). + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"pets" => true} + }) + + # Stub Monica to return one contact with statistics indicating one + # pet exists — collect_misc_data/5 should emit a plan entry for it. + Req.Test.stub(MonicaApiStub, fn conn -> + cond do + String.contains?(conn.request_path, "/api/contacts") -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 7, + "first_name" => "Plan", + "last_name" => "Test", + "statistics" => %{"number_of_calls" => 2} + } + ], + "meta" => %{"total" => 1, "last_page" => 1} + }) + + true -> + Req.Test.json(conn, %{"data" => []}) + end + end) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + # Misc worker should now be enqueued with a non-empty plan including + # "calls" for the imported contact. + assert_enqueued( + worker: Kith.Workers.MonicaMiscDataWorker, + args: %{"import_id" => import_job.id} + ) + end +``` + +(The exact stub_name and helper to inject Req.Test will mirror the existing tests in this file — adapt as needed.) + +- [ ] **Step 8: Run the cross-cutting test suite** + +Run: `mix test test/kith/workers/monica_api_crawl_worker_test.exs test/kith/workers/monica_misc_data_worker_test.exs test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS, all tests. + +- [ ] **Step 9: Run the full suite + quality gate** + +Run: `mix quality && mix test` + +Expected: PASS. No new credo, dialyzer, or sobelow findings. + +- [ ] **Step 10: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex lib/kith/workers/monica_api_crawl_worker.ex test/kith/workers/monica_api_crawl_worker_test.exs +git commit -m "refactor: extract Phase 4 to MonicaMiscDataWorker; enqueue from crawl worker" +``` + +--- + +## Task 10: End-to-end verification + +**Files:** *(no code changes — verification only)* + +- [ ] **Step 1: Confirm the full test suite passes** + +Run: `mix test` + +Expected: PASS, 1100+ tests, 0 failures. Count should match commit `6af91bf` plus new tests from this PR. + +- [ ] **Step 2: Confirm static analysis is clean** + +Run: `mix quality` + +Expected: `done (passed successfully)`. No new credo, sobelow, or dialyzer findings beyond the existing `.dialyzer_ignore.exs` skips. + +- [ ] **Step 3: Smoke test on dev — wipe and re-import** + +Manual: +- Start dev server: `iex -S mix phx.server` +- In IEx, cancel any in-flight imports: `Oban.cancel_all_jobs(from j in Oban.Job, where: j.worker in ["Kith.Workers.MonicaApiCrawlWorker", "Kith.Workers.MonicaMiscDataWorker", "Kith.Workers.MonicaPhotoSyncWorker"] and j.state in ["executing", "available", "scheduled", "retryable"])` +- Reset the dev account: `Kith.Workers.AccountResetWorker.new(%{"account_id" => , "user_id" => }) |> Oban.insert()` +- Wait for reset to complete; verify contact list is empty. +- Open `/settings/import` in browser; choose Monica API; enter URL and API key; ensure all defaults (including `auto_merge_duplicates`, `pets`, `calls`, etc.) are checked. +- Start the import; observe. + +Expected: +- `MonicaApiCrawlWorker` completes in **under 2 minutes** for ~1000 contacts (Phase 1+2+3 only, throttled at 55/min for ~20-30 pagination + auxiliary calls). +- Wizard transitions to "import complete" at that point; the duplicates tab is reachable and shows a small handful of legitimate pending candidates, NOT 6000. +- `MonicaMiscDataWorker` appears in the Oban dashboard as a separate executing job. +- Its runtime depends on actual misc data volume; for a typical CRM with sparse pet/debt/gift data, **single-digit minutes**. +- Logs show no `"3 attempts left forever"` retry spam. If any 429 fires (e.g. tighter self-hosted Monica limit), Req's built-in retry handles it once and proceeds. + +- [ ] **Step 4: Verify summary shape** + +In IEx: + +```elixir +import_job = Kith.Imports.get_import!() +import_job.summary +``` + +Expected after `MonicaApiCrawlWorker` completes: +```elixir +%{ + "imported" => 1000, + "contacts" => 1000, + "notes" => N, + "skipped" => 0, + "merged" => M, + "error_count" => 0, + "errors" => [] +} +``` + +The `"misc_data_plan"` key should be **absent** (stripped by `MonicaApiCrawlWorker` before persisting). + +After `MonicaMiscDataWorker` completes, refetch: + +```elixir +Kith.Imports.get_import!().summary["misc"] +``` + +Expected: +```elixir +%{ + "pets" => P, + "calls" => C, + "activities" => A, + "gifts" => G, + "debts" => D, + "tasks" => T, + "reminders" => R, + "conversations" => Co +} +``` + +with counts reflecting actual data imported. + +- [ ] **Step 5: Final cleanup commit (if any verification adjustments needed)** + +If smoke testing surfaces any small fixes (typos in log lines, edge cases in the plan filter), commit them as a separate small fix. Otherwise no commit needed for this task. + +- [ ] **Step 6: Push the branch** + +```bash +git push origin fix/duplicate-detection +``` + +Expected: GitHub shows the new commits on top of `6af91bf`. Open a PR if not already open, or update the existing one. + +--- + +## Self-review checklist + +Run through this once before handing off: + +1. **Spec coverage:** + - Part 1 (extract Phase 4): Tasks 7-9 ✓ + - Part 2 (rate limiter): Tasks 1-2 ✓ + - Part 3 (collapse retry): Task 3 ✓ + - Part 4 (statistics short-circuit): Task 7 ✓ + - Part 5a (persistent_term cleanup): Task 6 ✓ + - Part 5b (normalize: false opt): Tasks 4-5 ✓ + - Tests for all of the above: Tasks 1, 4, 5, 7, 8, 9 ✓ + - Verification: Task 10 ✓ + +2. **Placeholders:** All steps contain concrete code, exact commands, exact paths. Each instruction in the cutover task (Task 9) explicitly tells the engineer to `grep` first to find line ranges before deleting — no "delete the appropriate code" hand-waving. + +3. **Type consistency:** + - `MonicaApiCrawlWorker` enqueues with arg keys `"import_id"`, `"credential_url"`, `"credential_api_key"`, `"plan"` (Task 9 Step 6); `MonicaMiscDataWorker.perform/1` reads exactly those keys (Task 8 Step 5). ✓ + - `crawl/5` returns `misc_data_plan: ...` (atom key, Task 7 Step 5); `MonicaApiCrawlWorker` reads `summary_map[:misc_data_plan]` then strips `"misc_data_plan"` (string key) — covers both shapes since `Map.pop/3` returns default `[]` when key absent. ✓ + - `collect_misc_data` stringifies endpoints before storing in the plan (Task 7 Step 3); `MonicaMiscDataWorker.fire_endpoint/5` pattern-matches on strings (`"pets"`, `"calls"`, …) (Task 8 Step 5). ✓ + - `Contacts.create_contact_field/3` accepts `opts` as a keyword list (Task 4); Monica caller passes `normalize: false` (Task 5). ✓ + - `normalize_field_value/3` takes `ctx` (Task 6 Step 5); caller in `import_single_contact_field` passes `ctx` (Task 6 Step 6). ✓ From 81ba71416ef30a102d66bccb6508a0130830718c Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:17:45 +0300 Subject: [PATCH 28/58] feat: add Monica API per-host rate limiter (55/min) --- .../sources/monica_api/rate_limiter.ex | 48 +++++++++++++ .../sources/monica_api/rate_limiter_test.exs | 72 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 lib/kith/imports/sources/monica_api/rate_limiter.ex create mode 100644 test/kith/imports/sources/monica_api/rate_limiter_test.exs diff --git a/lib/kith/imports/sources/monica_api/rate_limiter.ex b/lib/kith/imports/sources/monica_api/rate_limiter.ex new file mode 100644 index 0000000..f84bd70 --- /dev/null +++ b/lib/kith/imports/sources/monica_api/rate_limiter.ex @@ -0,0 +1,48 @@ +defmodule Kith.Imports.Sources.MonicaApi.RateLimiter do + @moduledoc """ + Per-host token bucket for outbound Monica API calls. + + Configured at one token below Monica's documented default of 60 requests + per minute, leaving a one-call safety margin so a small clock-skew or + burst on Monica's side does not push us into the 429 window. + + Configurable via: + + config :kith, :monica_rate_limit, + + per-test overrides via `Application.put_env/3`. + + Hammer (already a dep) supplies the underlying token bucket; we use a + bucket key per Monica host so independent Monica instances do not share + a quota. Calls block the caller process via `Process.sleep/1` until a + token is available, then return `:ok`. + """ + + @default_scale_ms 60_000 + @default_limit 55 + @default_retry_sleep_ms 1_100 + + @spec wait!(String.t()) :: :ok + def wait!(url_or_host) when is_binary(url_or_host) do + bucket = bucket_key(url_or_host) + limit = Application.get_env(:kith, :monica_rate_limit, @default_limit) + scale_ms = Application.get_env(:kith, :monica_rate_limit_scale_ms, @default_scale_ms) + + retry_sleep_ms = + Application.get_env(:kith, :monica_rate_limit_retry_sleep_ms, @default_retry_sleep_ms) + + case Hammer.check_rate(bucket, scale_ms, limit) do + {:allow, _count} -> + :ok + + {:deny, _retry_after_ms} -> + Process.sleep(retry_sleep_ms) + wait!(url_or_host) + end + end + + defp bucket_key(url_or_host) do + host = URI.parse(url_or_host).host || url_or_host + "monica_api:#{host}" + end +end diff --git a/test/kith/imports/sources/monica_api/rate_limiter_test.exs b/test/kith/imports/sources/monica_api/rate_limiter_test.exs new file mode 100644 index 0000000..48eca8f --- /dev/null +++ b/test/kith/imports/sources/monica_api/rate_limiter_test.exs @@ -0,0 +1,72 @@ +defmodule Kith.Imports.Sources.MonicaApi.RateLimiterTest do + use ExUnit.Case, async: false + + alias Kith.Imports.Sources.MonicaApi.RateLimiter + + # Tests run with the real Hammer backend; we use a unique host per test + # so buckets do not collide between tests. We override the scale window + # and retry sleep to keep the suite fast — the production values live + # in config/config.exs. + + setup do + prev_limit = Application.get_env(:kith, :monica_rate_limit) + prev_scale = Application.get_env(:kith, :monica_rate_limit_scale_ms) + prev_retry = Application.get_env(:kith, :monica_rate_limit_retry_sleep_ms) + + Application.put_env(:kith, :monica_rate_limit, 1) + Application.put_env(:kith, :monica_rate_limit_scale_ms, 300) + Application.put_env(:kith, :monica_rate_limit_retry_sleep_ms, 50) + + on_exit(fn -> + Application.put_env(:kith, :monica_rate_limit, prev_limit) + Application.put_env(:kith, :monica_rate_limit_scale_ms, prev_scale) + Application.put_env(:kith, :monica_rate_limit_retry_sleep_ms, prev_retry) + end) + + :ok + end + + defp unique_host, do: "test-#{System.unique_integer([:positive])}.example" + + describe "wait!/1" do + test "returns :ok immediately while under the per-window budget" do + host = unique_host() + + {us, _} = + :timer.tc(fn -> assert :ok = RateLimiter.wait!("https://#{host}") end) + + assert us < 30_000, "expected sub-30ms for one call under the budget, got #{us}us" + end + + test "sleeps once the budget is exhausted" do + host = unique_host() + :ok = RateLimiter.wait!("https://#{host}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host}") end) + + assert us >= 30_000, "expected ≥30ms wait when over budget, got #{us}us" + assert us < 1_000_000, "did not expect ≥1s wait; window should have rolled by now" + end + + test "per-host buckets do not share quota" do + host_a = unique_host() + host_b = unique_host() + + :ok = RateLimiter.wait!("https://#{host_a}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host_b}") end) + assert us < 30_000, "host_b should be in its own bucket" + end + + test "extracts the host portion of a URL for the bucket key" do + host = unique_host() + url1 = "https://#{host}/api/contacts" + url2 = "https://#{host}/api/me" + + :ok = RateLimiter.wait!(url1) + + {us, _} = :timer.tc(fn -> RateLimiter.wait!(url2) end) + assert us >= 30_000, "same host → same bucket → second call should wait" + end + end +end From 4daf1ea532b4fd9054b44bc4e89132813cb230c0 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:18:19 +0300 Subject: [PATCH 29/58] chore: configure Monica API rate limit (55/min prod, unlimited test) --- config/config.exs | 4 ++++ config/test.exs | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/config/config.exs b/config/config.exs index e93ee72..e84d75c 100644 --- a/config/config.exs +++ b/config/config.exs @@ -14,6 +14,10 @@ config :mime, :types, %{"text/vcard" => ["vcf"], "application/json" => ["json"]} # locale-aware territory data without an explicit per-call backend argument. config :ex_cldr, default_backend: Kith.Cldr +# Outbound rate limit for Monica API calls. One below the documented +# default of 60 req/min leaves a one-call safety margin. +config :kith, :monica_rate_limit, 55 + config :kith, :scopes, user: [ default: true, diff --git a/config/test.exs b/config/test.exs index 215c77e..889cf4d 100644 --- a/config/test.exs +++ b/config/test.exs @@ -30,6 +30,10 @@ config :kith, Oban, testing: :manual # rules (NANP "555" prefixes, etc.) don't diverge from real behavior. config :ex_phone_number, metadata_file: Path.join("resources", "PhoneNumberMetadata.xml") +# Effectively unthrottled in tests — throttle logic is exercised in +# isolation in rate_limiter_test.exs, not via the full crawl integration. +config :kith, :monica_rate_limit, 1_000_000 + # Disable PromEx in tests (its Ecto poller conflicts with sandbox ownership) config :kith, Kith.PromEx, disabled: true From 23169f9c4b205650eddcbb36a93c5b9568ecd843 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:18:29 +0300 Subject: [PATCH 30/58] refactor: collapse Monica double-retry to Req's built-in + RateLimiter --- lib/kith/imports/sources/monica_api.ex | 43 +++++++++----------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index d3fe5b5..c483ca8 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -29,14 +29,13 @@ defmodule Kith.Imports.Sources.MonicaApi do alias Kith.Contacts alias Kith.Contacts.PhoneFormatter alias Kith.Imports + alias Kith.Imports.Sources.MonicaApi.RateLimiter alias Kith.Repo alias Kith.Workers.MonicaDocumentImportWorker require Logger @page_limit 100 - @max_rate_limit_retries 3 - @rate_limit_sleep_ms :timer.seconds(65) # ── Behaviour callbacks ─────────────────────────────────────────────── @@ -1099,40 +1098,28 @@ defmodule Kith.Imports.Sources.MonicaApi do # ── HTTP helpers ───────────────────────────────────────────────────── defp api_get(credential, url, params \\ []) do + RateLimiter.wait!(credential.url) + headers = [{"Authorization", "Bearer #{credential.api_key}"}, {"Accept", "application/json"}] req_options = Map.get(credential, :req_options, []) - options = [headers: headers, params: params] ++ req_options + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ req_options Req.get(url, options) end defp api_get_json(credential, url, params) do - api_get_json_with_retry(credential, url, params, 0) - end - - defp api_get_json_with_retry(_credential, _url, _params, retries) - when retries >= @max_rate_limit_retries do - {:error, :rate_limited} - end - - defp api_get_json_with_retry(credential, url, params, retries) do case api_get(credential, url, params) do - {:ok, %{status: 200, body: body}} when is_map(body) -> - {:ok, body} - - {:ok, %{status: 429}} -> - Logger.info( - "[MonicaApi] Rate limited, sleeping #{@rate_limit_sleep_ms}ms (retry #{retries + 1})" - ) - - Process.sleep(@rate_limit_sleep_ms) - api_get_json_with_retry(credential, url, params, retries + 1) - - {:ok, %{status: status}} -> - {:error, "Unexpected status: #{status}"} - - {:error, reason} -> - {:error, reason} + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} end end From c10df7514fe4e14ce884508399d8404026764d6a Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:20:59 +0300 Subject: [PATCH 31/58] feat: Contacts.create_contact_field/3 supports normalize: false opt --- lib/kith/contacts.ex | 9 ++++++-- test/kith/contacts_sub_entities_test.exs | 28 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/kith/contacts.ex b/lib/kith/contacts.ex index d92f9e4..4b9c2c7 100644 --- a/lib/kith/contacts.ex +++ b/lib/kith/contacts.ex @@ -387,8 +387,13 @@ defmodule Kith.Contacts do ContactField |> scope_to_account(account_id) |> Repo.get!(id) end - def create_contact_field(%Contact{} = contact, attrs) do - attrs = maybe_normalize_phone(attrs) + def create_contact_field(%Contact{} = contact, attrs, opts \\ []) do + attrs = + if Keyword.get(opts, :normalize, true) do + maybe_normalize_phone(attrs) + else + attrs + end %ContactField{contact_id: contact.id, account_id: contact.account_id} |> ContactField.changeset(attrs) diff --git a/test/kith/contacts_sub_entities_test.exs b/test/kith/contacts_sub_entities_test.exs index be7823a..27ae4d7 100644 --- a/test/kith/contacts_sub_entities_test.exs +++ b/test/kith/contacts_sub_entities_test.exs @@ -130,6 +130,34 @@ defmodule Kith.ContactsSubEntitiesTest do {:ok, _} = Contacts.delete_contact_field(field) assert Contacts.list_contact_fields(contact.id) == [] end + + test "create_contact_field/3 with normalize: false skips phone normalization", + %{contact: contact, account_id: account_id} do + phone_type = + Enum.find(Contacts.list_contact_field_types(account_id), fn t -> + t.protocol in ["tel", "tel:"] + end) + + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = + Contacts.create_contact_field(contact, attrs, normalize: false) + + assert field.value == "+1 (202) 555-0100" + end + + test "create_contact_field/3 with normalize: true (default) normalizes phone", + %{contact: contact, account_id: account_id} do + phone_type = + Enum.find(Contacts.list_contact_field_types(account_id), fn t -> + t.protocol in ["tel", "tel:"] + end) + + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = Contacts.create_contact_field(contact, attrs) + assert field.value == "+12025550100" + end end ## Relationships From 4220e9363f1cbb4c37be316442e6b290a1d9e823 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:21:11 +0300 Subject: [PATCH 32/58] perf: skip redundant normalization in Monica contact_field writes --- lib/kith/imports/sources/monica_api.ex | 2 +- test/kith/imports/sources/monica_api_test.exs | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index c483ca8..d251545 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -451,7 +451,7 @@ defmodule Kith.Imports.Sources.MonicaApi do defp create_contact_field(contact, field, cft_id, value, import_job) do attrs = %{"value" => value, "contact_field_type_id" => cft_id} - case Contacts.create_contact_field(contact, attrs) do + case Contacts.create_contact_field(contact, attrs, normalize: false) do {:ok, cf} -> maybe_record_entity(import_job, "contact_field", field["uuid"], "contact_field", cf.id) diff --git a/test/kith/imports/sources/monica_api_test.exs b/test/kith/imports/sources/monica_api_test.exs index 59ecc4c..0bbf227 100644 --- a/test/kith/imports/sources/monica_api_test.exs +++ b/test/kith/imports/sources/monica_api_test.exs @@ -908,6 +908,38 @@ defmodule Kith.Imports.Sources.MonicaApiTest do assert "+12025550100" in fields assert "+442079460958" in fields end + + test "phone normalization happens exactly once during import", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 99, + first_name: "OnceOnly", + contact_fields: [ + contact_field_json(content: "(202) 555-0100", type_name: "Phone") + ] + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, _} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "phone_default_region" => "US" + }) + + rec = Imports.find_import_record(account_id, "monica_api", "contact", "99") + + values = + Repo.all(from cf in Contacts.ContactField, where: cf.contact_id == ^rec.local_entity_id) + |> Enum.map(& &1.value) + + assert "+12025550100" in values + end end # ── Behaviour callbacks ────────────────────────────────────────────── From 90f3ee83331663b5cfe96edb913a79e952c5cd60 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:24:49 +0300 Subject: [PATCH 33/58] perf: replace :persistent_term phone-cft cache with ref_data map --- lib/kith/imports/sources/monica_api.ex | 71 +++++++++++++++----------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index d251545..b45f390 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -406,7 +406,7 @@ defmodule Kith.Imports.Sources.MonicaApi do cft_name = get_in(field, ["contact_field_type", "name"]) cft_id = if cft_name, do: Map.get(ref_data.contact_field_types, cft_name) raw_value = field["content"] - value = normalize_field_value(raw_value, cft_id, ctx.opts) + value = normalize_field_value(raw_value, cft_id, ref_data, ctx) if cft_id && value && !contact_field_duplicate?(contact.id, cft_id, value) do create_contact_field(contact, field, cft_id, value, ctx.import_job) @@ -415,12 +415,11 @@ defmodule Kith.Imports.Sources.MonicaApi do # Normalize phone fields to E.164 at import time so detection and intra-contact # dedup do simple equality. Other field types (email, social, etc) pass through. - defp normalize_field_value(nil, _cft_id, _opts), do: nil + defp normalize_field_value(nil, _cft_id, _ref_data, _ctx), do: nil - defp normalize_field_value(value, cft_id, opts) when is_binary(value) do - if phone_field_type?(cft_id) do - region = opts["phone_default_region"] - region = if region in [nil, ""], do: nil, else: region + defp normalize_field_value(value, cft_id, ref_data, ctx) when is_binary(value) do + if cft_id && Map.has_key?(ref_data.phone_cft_ids, cft_id) do + region = parse_phone_region(ctx.opts["phone_default_region"]) {:ok, normalized} = PhoneFormatter.normalize(value, region) normalized || value else @@ -428,25 +427,8 @@ defmodule Kith.Imports.Sources.MonicaApi do end end - defp phone_field_type?(nil), do: false - - defp phone_field_type?(cft_id) do - case :persistent_term.get({__MODULE__, :phone_cft, cft_id}, :miss) do - :miss -> - result = - Repo.exists?( - from(t in Contacts.ContactFieldType, - where: t.id == ^cft_id and fragment("? LIKE 'tel%'", t.protocol) - ) - ) - - :persistent_term.put({__MODULE__, :phone_cft, cft_id}, result) - result - - result -> - result - end - end + defp parse_phone_region(region) when region in [nil, ""], do: nil + defp parse_phone_region(region) when is_binary(region), do: region defp create_contact_field(contact, field, cft_id, value, import_job) do attrs = %{"value" => value, "contact_field_type_id" => cft_id} @@ -970,11 +952,13 @@ defmodule Kith.Imports.Sources.MonicaApi do genders = collect_api_genders(contacts) tags = collect_api_tags(contacts) cfts = collect_api_contact_field_types(contacts) + cft_map = find_or_create_contact_field_types(account_id, cfts) %{ genders: find_or_create_genders(account_id, genders), tags: find_or_create_tags(account_id, tags), - contact_field_types: find_or_create_contact_field_types(account_id, cfts) + contact_field_types: cft_map, + phone_cft_ids: phone_cft_id_map(account_id, Map.values(cft_map)) } end @@ -994,14 +978,23 @@ defmodule Kith.Imports.Sources.MonicaApi do |> collect_api_contact_field_types() |> Enum.reject(&Map.has_key?(ref_data.contact_field_types, &1)) + added_cfts = find_or_create_contact_field_types(account_id, new_cfts) + + phone_cft_ids = + if new_cfts == [] do + ref_data.phone_cft_ids + else + Map.merge( + ref_data.phone_cft_ids, + phone_cft_id_map(account_id, Map.values(added_cfts)) + ) + end + %{ genders: Map.merge(ref_data.genders, find_or_create_genders(account_id, new_genders)), tags: Map.merge(ref_data.tags, find_or_create_tags(account_id, new_tags)), - contact_field_types: - Map.merge( - ref_data.contact_field_types, - find_or_create_contact_field_types(account_id, new_cfts) - ) + contact_field_types: Map.merge(ref_data.contact_field_types, added_cfts), + phone_cft_ids: phone_cft_ids } end @@ -1077,6 +1070,22 @@ defmodule Kith.Imports.Sources.MonicaApi do end) end + # O(1)-lookup map of phone-protocol contact_field_type IDs. A plain map + # (`%{id => true}`) is used rather than a MapSet to keep dialyzer happy + # with the ref_data shape inference. + defp phone_cft_id_map(_account_id, []), do: %{} + + defp phone_cft_id_map(account_id, cft_ids) when is_list(cft_ids) do + Repo.all( + from t in Contacts.ContactFieldType, + where: t.id in ^cft_ids, + where: is_nil(t.account_id) or t.account_id == ^account_id, + where: fragment("? LIKE 'tel%'", t.protocol), + select: t.id + ) + |> Map.new(&{&1, true}) + end + defp find_or_create_relationship_type(_account_id, nil, _reverse), do: nil defp find_or_create_relationship_type(account_id, name, reverse_name) do From 56d5911f032d1c0511a501305e0d8156b9ba4ed6 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:27:49 +0300 Subject: [PATCH 34/58] feat: collect misc-data plan during Monica crawl --- lib/kith/imports/sources/monica_api.ex | 59 ++++++++- test/kith/imports/sources/monica_api_test.exs | 123 ++++++++++++++++++ test/support/fixtures/monica_api_fixtures.ex | 20 +-- 3 files changed, 189 insertions(+), 13 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index b45f390..7761036 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -135,7 +135,8 @@ defmodule Kith.Imports.Sources.MonicaApi do skipped: acc.skipped, merged: merge_result.merged, error_count: error_count, - errors: Enum.take(all_errors, 50) + errors: Enum.take(all_errors, 50), + misc_data_plan: Enum.reverse(deferred.misc_data) }} catch :cancelled -> @@ -158,7 +159,12 @@ defmodule Kith.Imports.Sources.MonicaApi do page: 1, total: nil, acc: %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, - deferred: %{first_met_through: [], relationships: [], extra_notes: []}, + deferred: %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }, ref_data: nil, global_idx: 0 } @@ -388,7 +394,7 @@ defmodule Kith.Imports.Sources.MonicaApi do import_api_tags(contact, api_contact, ref_data) # Collect deferred data - deferred = collect_deferred_data(api_contact, source_id, deferred) + deferred = collect_deferred_data(api_contact, source_id, contact.id, deferred, ctx.opts) acc = %{acc | contacts: acc.contacts + 1, notes: acc.notes + n} {acc, deferred} @@ -519,11 +525,56 @@ defmodule Kith.Imports.Sources.MonicaApi do end) end - defp collect_deferred_data(api_contact, source_id, deferred) do + defp collect_deferred_data(api_contact, source_id, local_id, deferred, opts) do deferred |> collect_first_met_through(api_contact, source_id) |> collect_relationships(api_contact, source_id) |> collect_extra_notes(api_contact, source_id) + |> collect_misc_data(api_contact, source_id, local_id, opts) + end + + @misc_endpoints [ + {:calls, "number_of_calls"}, + {:activities, "number_of_activities"}, + {:gifts, "number_of_gifts"}, + {:debts, "number_of_debts"}, + {:tasks, "number_of_tasks"}, + {:reminders, "number_of_reminders"}, + {:conversations, "number_of_conversations"} + ] + + # Build a plan entry for a contact's per-contact extra-data endpoints. + # An endpoint is included only if (a) the wizard opt for that data type is + # not explicitly false AND (b) Monica's `statistics.number_of_X` reports + # > 0 (or the stat field is missing — safer to fetch than to silently + # skip when Monica's payload shape is unfamiliar). + # + # `:pets` has no statistics field in Monica's contact payload, so it is + # included whenever the wizard opt is on. The redundant fetch for pet-free + # contacts is the documented cost. + defp collect_misc_data(deferred, api_contact, source_id, local_id, opts) do + stats = api_contact["statistics"] || %{} + + endpoints = + @misc_endpoints + |> Enum.filter(fn {key, stat_field} -> + opts[Atom.to_string(key)] != false and (stats[stat_field] || 1) > 0 + end) + |> Enum.map(&elem(&1, 0)) + + endpoints = if opts["pets"] != false, do: [:pets | endpoints], else: endpoints + + if endpoints == [] do + deferred + else + entry = %{ + source_id: to_string(source_id), + local_id: local_id, + endpoints: Enum.map(endpoints, &Atom.to_string/1) + } + + %{deferred | misc_data: [entry | deferred.misc_data]} + end end defp collect_first_met_through(deferred, api_contact, source_id) do diff --git a/test/kith/imports/sources/monica_api_test.exs b/test/kith/imports/sources/monica_api_test.exs index 0bbf227..2d4e001 100644 --- a/test/kith/imports/sources/monica_api_test.exs +++ b/test/kith/imports/sources/monica_api_test.exs @@ -942,6 +942,129 @@ defmodule Kith.Imports.Sources.MonicaApiTest do end end + describe "crawl/5 — misc-data plan" do + test "includes a contact when statistics.number_of_calls > 0", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 1, + first_name: "Has", + last_name: "Calls", + statistics: %{"number_of_calls" => 3} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false + }) + + assert [%{source_id: "1", endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints + end + + test "excludes a contact when all opts are off", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 2, + first_name: "AllOff", + statistics: %{"number_of_calls" => 5, "number_of_gifts" => 5} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => false, + "gifts" => false, + "pets" => false, + "activities" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert summary.misc_data_plan == [] + end + + test "includes :pets unconditionally when opt is on (no stat field)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 3, + first_name: "PetsOnly", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "pets" => true, + "calls" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: ["pets"]}] = summary.misc_data_plan + end + + test "missing statistic field is treated as >=1 (safe default)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 4, + first_name: "NoStats", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints + end + end + # ── Behaviour callbacks ────────────────────────────────────────────── describe "behaviour callbacks" do diff --git a/test/support/fixtures/monica_api_fixtures.ex b/test/support/fixtures/monica_api_fixtures.ex index 7b230e6..49be7d5 100644 --- a/test/support/fixtures/monica_api_fixtures.ex +++ b/test/support/fixtures/monica_api_fixtures.ex @@ -49,15 +49,17 @@ defmodule Kith.MonicaApiFixtures do }, "addresses" => overrides[:addresses] || [], "tags" => overrides[:tags] || [], - "statistics" => %{ - "number_of_calls" => 0, - "number_of_notes" => overrides[:number_of_notes] || 0, - "number_of_activities" => 0, - "number_of_reminders" => 0, - "number_of_tasks" => 0, - "number_of_gifts" => 0, - "number_of_debts" => 0 - }, + "statistics" => + overrides[:statistics] || + %{ + "number_of_calls" => 0, + "number_of_notes" => overrides[:number_of_notes] || 0, + "number_of_activities" => 0, + "number_of_reminders" => 0, + "number_of_tasks" => 0, + "number_of_gifts" => 0, + "number_of_debts" => 0 + }, "contactFields" => overrides[:contact_fields] || [], "notes" => overrides[:notes] || [], "account" => %{"id" => 1}, From 913e4d0ec5defba8b1d2a2d73808549202f988e2 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:34:44 +0300 Subject: [PATCH 35/58] feat: add MonicaMiscDataWorker (per-contact extra data, plan-driven) --- lib/kith/contacts.ex | 11 + .../sources/monica_api/rate_limiter.ex | 6 +- lib/kith/workers/monica_misc_data_worker.ex | 692 ++++++++++++++++++ .../workers/monica_misc_data_worker_test.exs | 161 ++++ 4 files changed, 867 insertions(+), 3 deletions(-) create mode 100644 lib/kith/workers/monica_misc_data_worker.ex create mode 100644 test/kith/workers/monica_misc_data_worker_test.exs diff --git a/lib/kith/contacts.ex b/lib/kith/contacts.ex index 4b9c2c7..d5c7bef 100644 --- a/lib/kith/contacts.ex +++ b/lib/kith/contacts.ex @@ -387,6 +387,17 @@ defmodule Kith.Contacts do ContactField |> scope_to_account(account_id) |> Repo.get!(id) end + @doc """ + Fetch a contact by ID without scope enforcement, for use by the + Monica misc-data worker. The worker has already verified the contact + belongs to an import the user authorized; we just need the row. + + Returns `nil` if not found. + """ + def get_contact_for_misc(id) when is_integer(id) or is_binary(id) do + Repo.get(Contact, id) + end + def create_contact_field(%Contact{} = contact, attrs, opts \\ []) do attrs = if Keyword.get(opts, :normalize, true) do diff --git a/lib/kith/imports/sources/monica_api/rate_limiter.ex b/lib/kith/imports/sources/monica_api/rate_limiter.ex index f84bd70..a9d75cc 100644 --- a/lib/kith/imports/sources/monica_api/rate_limiter.ex +++ b/lib/kith/imports/sources/monica_api/rate_limiter.ex @@ -25,11 +25,11 @@ defmodule Kith.Imports.Sources.MonicaApi.RateLimiter do @spec wait!(String.t()) :: :ok def wait!(url_or_host) when is_binary(url_or_host) do bucket = bucket_key(url_or_host) - limit = Application.get_env(:kith, :monica_rate_limit, @default_limit) - scale_ms = Application.get_env(:kith, :monica_rate_limit_scale_ms, @default_scale_ms) + limit = Application.get_env(:kith, :monica_rate_limit) || @default_limit + scale_ms = Application.get_env(:kith, :monica_rate_limit_scale_ms) || @default_scale_ms retry_sleep_ms = - Application.get_env(:kith, :monica_rate_limit_retry_sleep_ms, @default_retry_sleep_ms) + Application.get_env(:kith, :monica_rate_limit_retry_sleep_ms) || @default_retry_sleep_ms case Hammer.check_rate(bucket, scale_ms, limit) do {:allow, _count} -> diff --git a/lib/kith/workers/monica_misc_data_worker.ex b/lib/kith/workers/monica_misc_data_worker.ex new file mode 100644 index 0000000..298711b --- /dev/null +++ b/lib/kith/workers/monica_misc_data_worker.ex @@ -0,0 +1,692 @@ +defmodule Kith.Workers.MonicaMiscDataWorker do + @moduledoc """ + Oban worker that imports the per-contact "miscellaneous" data types + (pets, calls, activities, gifts, debts, tasks, reminders, conversations) + for an already-completed Monica API crawl. + + Enqueued by `Kith.Workers.MonicaApiCrawlWorker` on successful completion, + carrying: + + * `"import_id"` — the Import row this job belongs to. + * `"credential_url"`, `"credential_api_key"` — the credential needed to + keep calling Monica after the main crawl wipes `api_key_encrypted`. + Same pattern as `MonicaPhotoSyncWorker`. + * `"plan"` — list of `%{"source_id", "local_id", "endpoints"}` maps + pre-filtered during the main crawl using Monica's `statistics.*` + fields, so we only fire the endpoints with data. + + Throttled through `Kith.Imports.Sources.MonicaApi.RateLimiter` (same + per-host bucket as the main crawler). + + Exits early if the import has been cancelled. Contacts that were + soft-deleted between main-crawl completion and this job's dispatch are + silently skipped. + """ + + use Oban.Worker, queue: :imports, max_attempts: 3 + + require Logger + + import Ecto.Query, warn: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Imports.Sources.MonicaApi.RateLimiter + alias Kith.Repo + + @impl Oban.Worker + def timeout(_job), do: :timer.minutes(30) + + @impl Oban.Worker + def perform(%Oban.Job{args: args}) do + import_job = Imports.get_import!(args["import_id"]) + + if import_job.status in ["cancelled", "failed"] do + :ok + else + credential = build_credential(args) + plan = args["plan"] || [] + + counts = process_plan(plan, credential, import_job) + + summary = Map.put(import_job.summary || %{}, "misc", counts) + + Imports.update_import_status(import_job, import_job.status, %{summary: summary}) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_misc_complete, counts}) + + :ok + end + end + + defp build_credential(args) do + %{ + url: args["credential_url"], + api_key: args["credential_api_key"], + req_options: Application.get_env(:kith, :monica_req_options, []) + } + end + + defp process_plan(plan, credential, import_job) do + initial = %{ + "pets" => 0, + "calls" => 0, + "activities" => 0, + "gifts" => 0, + "debts" => 0, + "tasks" => 0, + "reminders" => 0, + "conversations" => 0 + } + + user_id = import_job.user_id + + Enum.reduce(plan, initial, fn entry, counts -> + process_entry(entry, credential, user_id, import_job, counts) + end) + end + + defp process_entry(entry, credential, user_id, import_job, counts) do + contact = Contacts.get_contact_for_misc(entry["local_id"]) + + if contact == nil or not is_nil(contact.deleted_at) do + counts + else + Enum.reduce(entry["endpoints"] || [], counts, fn endpoint, counts -> + n = fire_endpoint(endpoint, credential, user_id, contact, entry["source_id"], import_job) + Map.update(counts, endpoint, n, &(&1 + n)) + end) + end + end + + defp fire_endpoint("pets", c, _u, contact, src, ij), + do: import_contact_pets(c, contact, src, ij) + + defp fire_endpoint("calls", c, _u, contact, src, ij), + do: import_contact_calls(c, contact, src, ij) + + defp fire_endpoint("activities", c, _u, contact, src, ij), + do: import_contact_activities(c, contact, src, ij) + + defp fire_endpoint("gifts", c, u, contact, src, ij), + do: import_contact_gifts(c, u, contact, src, ij) + + defp fire_endpoint("debts", c, u, contact, src, ij), + do: import_contact_debts(c, u, contact, src, ij) + + defp fire_endpoint("tasks", c, u, contact, src, ij), + do: import_contact_tasks(c, u, contact, src, ij) + + defp fire_endpoint("reminders", c, u, contact, src, ij), + do: import_contact_reminders(c, u, contact, src, ij) + + defp fire_endpoint("conversations", c, u, contact, src, ij), + do: import_contact_conversations(c, u, contact, src, ij) + + defp fire_endpoint(other, _, _, _, _, _) do + Logger.warning("[MonicaMiscData] unknown endpoint #{inspect(other)}; skipping") + 0 + end + + # ── HTTP wrapper ────────────────────────────────────────────────────── + + defp api_get_json(credential, url, params) do + RateLimiter.wait!(credential.url) + + headers = [ + {"Authorization", "Bearer #{credential.api_key}"}, + {"Accept", "application/json"} + ] + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ Map.get(credential, :req_options, []) + + case Req.get(url, options) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} + end + end + + # ── Per-contact endpoint helpers ───────────────────────────────────── + # + # Each top-level helper returns the count of items successfully imported + # for the per-endpoint summary aggregate. Per-item helpers return either + # `:ok` (success) or `{:error, _}` (skipped/failed). + + defp import_contact_pets(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/pets" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => pets}} when is_list(pets) -> + Enum.count(pets, fn pet -> + match?(:ok, import_single_pet(contact.account_id, contact, pet, import_job)) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch pets for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_pet(account_id, contact, pet_data, import_job) do + name = pet_data["name"] + species = normalize_pet_species(pet_data["pet_category"] || pet_data["species"]) + + if pet_duplicate?(contact.id, name, species) do + {:error, :duplicate} + else + attrs = %{ + "contact_id" => contact.id, + "name" => name || "Unknown", + "species" => species, + "breed" => non_empty_string(pet_data["breed"]), + "notes" => non_empty_string(pet_data["notes"]) + } + + case Kith.Pets.create_pet(account_id, attrs) do + {:ok, pet} -> + maybe_record_entity(import_job, "pet", pet_data["id"], "pet", pet.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] pet error: #{inspect_errors(reason)}") + {:error, reason} + end + end + end + + defp import_contact_calls(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/calls" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => calls}} when is_list(calls) -> + Enum.count(calls, fn call -> + match?(:ok, import_single_call(contact.account_id, contact, call, import_job)) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch calls for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_call(account_id, contact, call_data, import_job) do + occurred_at = parse_datetime(call_data["called_at"]) + + if is_nil(occurred_at) do + {:error, :no_timestamp} + else + attrs = %{ + "occurred_at" => occurred_at, + "notes" => non_empty_string(call_data["content"]), + "duration_mins" => call_data["duration"] + } + + case Kith.Activities.create_call(%{account_id: account_id, id: contact.id}, attrs) do + {:ok, call} -> + maybe_record_entity(import_job, "call", call_data["id"], "call", call.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] call error: #{inspect_errors(reason)}") + {:error, reason} + end + end + end + + defp import_contact_activities(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/activities" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => activities}} when is_list(activities) -> + Enum.count(activities, fn activity -> + match?(:ok, import_single_activity(contact.account_id, contact, activity, import_job)) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch activities for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_activity(account_id, contact, activity_data, import_job) do + occurred_at = + parse_datetime(activity_data["happened_at"] || activity_data["date_it_happened"]) + + attrs = %{ + "title" => activity_data["summary"] || activity_data["title"] || "Imported activity", + "description" => non_empty_string(activity_data["description"]), + "occurred_at" => occurred_at || DateTime.utc_now() + } + + case Kith.Activities.create_activity(account_id, attrs, [contact.id]) do + {:ok, activity} -> + maybe_record_entity(import_job, "activity", activity_data["id"], "activity", activity.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] activity error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_gifts(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/gifts" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => gifts}} when is_list(gifts) -> + Enum.count(gifts, fn gift -> + match?( + :ok, + import_single_gift(contact.account_id, user_id, contact, gift, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch gifts for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_gift(account_id, user_id, contact, gift_data, import_job) do + direction = + case gift_data["is_for"] do + "contact" -> "given" + _ -> "received" + end + + attrs = %{ + "contact_id" => contact.id, + "name" => gift_data["name"] || "Imported gift", + "description" => non_empty_string(gift_data["comment"]), + "direction" => direction, + "status" => + cond do + gift_data["has_been_offered"] -> "given" + gift_data["has_been_received"] -> "received" + true -> "idea" + end, + "amount" => gift_data["amount"], + "date" => parse_date_string(gift_data["date"]) + } + + case Kith.Gifts.create_gift(account_id, user_id, attrs) do + {:ok, gift} -> + maybe_record_entity(import_job, "gift", gift_data["id"], "gift", gift.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] gift error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_debts(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/debts" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => debts}} when is_list(debts) -> + Enum.count(debts, fn debt -> + match?( + :ok, + import_single_debt(contact.account_id, user_id, contact, debt, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch debts for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_debt(account_id, user_id, contact, debt_data, import_job) do + direction = + case debt_data["in_debt"] do + "yes" -> "owed_by_me" + _ -> "owed_to_me" + end + + attrs = %{ + "contact_id" => contact.id, + "title" => debt_data["reason"] || "Imported debt", + "amount" => debt_data["amount"] || "0", + "direction" => direction, + "status" => if(debt_data["status"] == "complete", do: "settled", else: "active") + } + + case Kith.Debts.create_debt(account_id, user_id, attrs) do + {:ok, debt} -> + maybe_record_entity(import_job, "debt", debt_data["id"], "debt", debt.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] debt error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_tasks(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/tasks" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => tasks}} when is_list(tasks) -> + Enum.count(tasks, fn task -> + match?( + :ok, + import_single_task(contact.account_id, user_id, contact, task, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch tasks for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_task(account_id, user_id, contact, task_data, import_job) do + status = if task_data["completed"], do: "completed", else: "pending" + + attrs = %{ + "contact_id" => contact.id, + "title" => task_data["title"] || "Imported task", + "description" => non_empty_string(task_data["description"]), + "status" => status + } + + case Kith.Tasks.create_task(account_id, user_id, attrs) do + {:ok, task} -> + maybe_record_entity(import_job, "task", task_data["id"], "task", task.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] task error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_reminders(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/reminders" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => reminders}} when is_list(reminders) -> + Enum.count(reminders, fn reminder -> + match?( + :ok, + import_single_reminder(contact.account_id, user_id, contact, reminder, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch reminders for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_reminder(account_id, user_id, contact, reminder_data, import_job) do + {type, frequency} = map_monica_reminder_frequency(reminder_data["frequency_type"]) + + next_date = + parse_date_string(reminder_data["next_expected_date"]) || + Date.utc_today() + + attrs = %{ + "contact_id" => contact.id, + "type" => type, + "title" => reminder_data["title"] || "Imported reminder", + "frequency" => frequency, + "next_reminder_date" => next_date + } + + case Kith.Reminders.create_reminder(account_id, user_id, attrs) do + {:ok, reminder} -> + maybe_record_entity(import_job, "reminder", reminder_data["id"], "reminder", reminder.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] reminder error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_conversations(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/conversations" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => convos}} when is_list(convos) -> + Enum.count(convos, fn convo -> + match?( + :ok, + import_single_conversation( + contact.account_id, + user_id, + contact, + convo, + import_job + ) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch conversations for contact #{source_id}: " <> + inspect(reason) + ) + + 0 + end + end + + defp import_single_conversation(account_id, user_id, contact, convo_data, import_job) do + platform = + case convo_data["contact_field_type"] do + %{"name" => name} -> normalize_conversation_platform(name) + _ -> "other" + end + + attrs = %{ + "contact_id" => contact.id, + "platform" => platform, + "subject" => non_empty_string(convo_data["subject"]) + } + + case Kith.Conversations.create_conversation(account_id, user_id, attrs) do + {:ok, conversation} -> + maybe_record_entity( + import_job, + "conversation", + convo_data["id"], + "conversation", + conversation.id + ) + + import_conversation_messages(conversation, convo_data, import_job) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] conversation error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_conversation_messages(conversation, convo_data, import_job) do + messages = convo_data["messages"] || [] + + Enum.each(messages, fn msg -> + attrs = %{ + "body" => msg["content"] || msg["written_by_me_body"] || "", + "direction" => if(msg["written_by_me"], do: "sent", else: "received"), + "sent_at" => parse_datetime(msg["written_at"]) || DateTime.utc_now() + } + + case Kith.Conversations.add_message(conversation, attrs) do + {:ok, message} -> + maybe_record_entity(import_job, "message", msg["id"], "message", message.id) + + {:error, reason} -> + Logger.warning("[MonicaMiscData] message error: #{inspect_errors(reason)}") + end + end) + end + + # ── Local helpers (copied from MonicaApi) ──────────────────────────── + + defp normalize_pet_species(nil), do: "other" + + defp normalize_pet_species(species) when is_map(species), + do: normalize_pet_species(species["name"]) + + defp normalize_pet_species(species) when is_binary(species) do + normalized = String.downcase(species) + + if normalized in ~w(dog cat bird fish reptile rabbit hamster) do + normalized + else + "other" + end + end + + defp normalize_pet_species(_), do: "other" + + defp pet_duplicate?(contact_id, name, species) do + Repo.exists?( + from p in Kith.Contacts.Pet, + where: + p.contact_id == ^contact_id and + fragment("lower(coalesce(?, ''))", p.name) == + fragment("lower(coalesce(?, ''))", ^(name || "")) and + p.species == ^species + ) + end + + defp map_monica_reminder_frequency("one_time"), do: {"one_time", nil} + defp map_monica_reminder_frequency("week"), do: {"recurring", "weekly"} + defp map_monica_reminder_frequency("month"), do: {"recurring", "monthly"} + defp map_monica_reminder_frequency("year"), do: {"recurring", "annually"} + defp map_monica_reminder_frequency(_), do: {"one_time", nil} + + @platform_keywords [ + {"sms", "sms"}, + {"text", "sms"}, + {"whatsapp", "whatsapp"}, + {"telegram", "telegram"}, + {"email", "email"}, + {"instagram", "instagram"}, + {"messenger", "messenger"}, + {"facebook", "messenger"}, + {"signal", "signal"} + ] + + defp normalize_conversation_platform(name) when is_binary(name) do + normalized = String.downcase(name) + + Enum.find_value(@platform_keywords, "other", fn {keyword, platform} -> + if String.contains?(normalized, keyword), do: platform + end) + end + + defp normalize_conversation_platform(_), do: "other" + + defp non_empty_string(nil), do: nil + defp non_empty_string(""), do: nil + defp non_empty_string(s) when is_binary(s), do: s + defp non_empty_string(_), do: nil + + defp parse_datetime(nil), do: nil + + defp parse_datetime(str) when is_binary(str) do + case DateTime.from_iso8601(str) do + {:ok, dt, _offset} -> dt + _ -> nil + end + end + + defp parse_datetime(_), do: nil + + defp parse_date_string(nil), do: nil + + defp parse_date_string(str) when is_binary(str) do + case Date.from_iso8601(str) do + {:ok, date} -> + date + + {:error, _} -> + case DateTime.from_iso8601(str) do + {:ok, dt, _offset} -> DateTime.to_date(dt) + _ -> nil + end + end + end + + defp parse_date_string(_), do: nil + + defp inspect_errors(%Ecto.Changeset{} = changeset) do + Ecto.Changeset.traverse_errors(changeset, fn {msg, opts} -> + Regex.replace(~r"%{(\w+)}", msg, fn _, key -> + opts |> Keyword.get(String.to_existing_atom(key), key) |> to_string() + end) + end) + |> inspect() + end + + defp inspect_errors(other), do: inspect(other) + + defp maybe_record_entity(nil, _type, _id, _local_type, _local_id), do: :ok + defp maybe_record_entity(_import, _type, nil, _local_type, _local_id), do: :ok + + defp maybe_record_entity(import_job, type, source_id, local_type, local_id) do + Imports.record_imported_entity(import_job, type, to_string(source_id), local_type, local_id) + end +end diff --git a/test/kith/workers/monica_misc_data_worker_test.exs b/test/kith/workers/monica_misc_data_worker_test.exs new file mode 100644 index 0000000..da5ff7e --- /dev/null +++ b/test/kith/workers/monica_misc_data_worker_test.exs @@ -0,0 +1,161 @@ +defmodule Kith.Workers.MonicaMiscDataWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + + alias Kith.Imports + alias Kith.Workers.MonicaMiscDataWorker + + @stub_name MonicaMiscDataReqStub + + setup do + seed_reference_data!() + user = user_fixture() + + Application.put_env( + :kith, + :monica_req_options, + plug: {Req.Test, @stub_name}, + retry: false + ) + + on_exit(fn -> Application.delete_env(:kith, :monica_req_options) end) + + %{user: user, account_id: user.account_id} + end + + defp build_args(import_job, plan) do + %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key", + "plan" => plan + } + end + + defp api_import(account_id, user_id, api_options \\ %{}) do + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: api_options, + status: "completed" + }) + end + + describe "perform/1" do + test "fires only the endpoints listed in the plan", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [ + %{ + "source_id" => "42", + "local_id" => contact.id, + "endpoints" => ["calls", "gifts"] + } + ] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + paths = collect_requests([]) + assert "/api/contacts/42/calls" in paths + assert "/api/contacts/42/gifts" in paths + refute "/api/contacts/42/pets" in paths + refute "/api/contacts/42/activities" in paths + end + + test "exits early when the import is cancelled", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + {:ok, _} = Imports.update_import_status(import_job, "cancelled", %{}) + + contact = contact_fixture(account_id) + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "skips contacts whose local row has been soft-deleted", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + contact = contact_fixture(account_id) + + Repo.update_all( + from(c in Kith.Contacts.Contact, where: c.id == ^contact.id), + set: [deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)] + ) + + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "writes per-endpoint counts to import_job.summary['misc']", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case conn.request_path do + "/api/contacts/1/calls" -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "called_at" => "2025-01-01T10:00:00Z", "contact_called" => true}, + %{"id" => 2, "called_at" => "2025-01-02T10:00:00Z", "contact_called" => false} + ] + }) + + _ -> + Req.Test.json(conn, %{"data" => []}) + end + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + updated = Imports.get_import!(import_job.id) + assert is_map(updated.summary["misc"]) + assert updated.summary["misc"]["calls"] >= 0 + end + end + + defp collect_requests(acc) do + receive do + {:request, path} -> collect_requests([path | acc]) + after + 0 -> Enum.reverse(acc) + end + end +end From 07158edbd23a3664a098802702390f159128652e Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:41:09 +0300 Subject: [PATCH 36/58] refactor: extract Phase 4 to MonicaMiscDataWorker; enqueue from crawl worker --- lib/kith/imports/sources/monica_api.ex | 744 +----------------- lib/kith/workers/monica_api_crawl_worker.ex | 38 +- .../workers/monica_api_crawl_worker_test.exs | 53 ++ 3 files changed, 93 insertions(+), 742 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index 7761036..a486a19 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -107,9 +107,10 @@ defmodule Kith.Imports.Sources.MonicaApi do [] end - # Phase 4: Additional data types (per-contact endpoints) - extra_data_errors = - import_extra_data_types(credential, account_id, user_id, import_job, opts) + # Phase 4: Per-contact "misc" data (pets, calls, activities, gifts, debts, + # tasks, reminders, conversations) is now planned during the main crawl + # and dispatched as a separate `MonicaMiscDataWorker` Oban job by + # `MonicaApiCrawlWorker`. The plan is carried in `summary.misc_data_plan`. # Phase 5: Enqueue document import jobs (async, runs after main import) if opts["documents"] do @@ -120,12 +121,11 @@ defmodule Kith.Imports.Sources.MonicaApi do acc.errors ++ ref_errors ++ notes_errors ++ - merge_result.errors ++ - extra_data_errors + merge_result.errors error_count = acc.error_count + length(ref_errors) + length(notes_errors) + - length(merge_result.errors) + length(extra_data_errors) + length(merge_result.errors) {:ok, %{ @@ -1317,738 +1317,6 @@ defmodule Kith.Imports.Sources.MonicaApi do end end - # ── Phases 5-12: Additional per-contact data types ───────────────── - - defp import_extra_data_types(credential, account_id, user_id, import_job, opts) do - # Get all imported contact IDs for this job - contact_records = - Repo.all( - from(ir in Imports.ImportRecord, - where: - ir.import_id == ^import_job.id and - ir.source_entity_type == "contact", - select: {ir.source_entity_id, ir.local_entity_id} - ) - ) - - errors = - Enum.flat_map(contact_records, fn {source_id, local_id} -> - contact = - Repo.get(Contacts.Contact, local_id) - - if contact && is_nil(contact.deleted_at) do - import_per_contact_data( - credential, - account_id, - user_id, - contact, - source_id, - import_job, - opts - ) - else - [] - end - end) - - errors - end - - defp import_per_contact_data( - credential, - account_id, - user_id, - contact, - source_id, - import_job, - opts - ) do - errors = [] - base_url = credential.url - - # Phase 5: Pets - errors = - if opts["pets"] do - errors ++ - import_contact_pets(credential, base_url, account_id, contact, source_id, import_job) - else - errors - end - - # Phase 6: Calls - errors = - if opts["calls"] do - errors ++ - import_contact_calls( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 7: Activities - errors = - if opts["activities"] do - errors ++ - import_contact_activities( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 8: Gifts - errors = - if opts["gifts"] do - errors ++ - import_contact_gifts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 9: Debts - errors = - if opts["debts"] do - errors ++ - import_contact_debts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 10: Tasks - errors = - if opts["tasks"] do - errors ++ - import_contact_tasks( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 11: Reminders - errors = - if opts["reminders"] do - errors ++ - import_contact_reminders( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 12: Conversations - errors = - if opts["conversations"] do - errors ++ - import_contact_conversations( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - errors - end - - # ── Phase 5: Pets ────────────────────────────────────────────────── - - defp import_contact_pets(credential, base_url, account_id, contact, source_id, import_job) do - url = "#{base_url}/api/contacts/#{source_id}/pets" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => pets}} when is_list(pets) -> - Enum.flat_map(pets, fn pet -> - import_single_pet(account_id, contact, pet, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch pets for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_pet(account_id, contact, pet_data, import_job) do - name = pet_data["name"] - species = normalize_pet_species(pet_data["pet_category"] || pet_data["species"]) - - if pet_duplicate?(contact.id, name, species) do - [] - else - attrs = %{ - "contact_id" => contact.id, - "name" => name || "Unknown", - "species" => species, - "breed" => non_empty_string(pet_data["breed"]), - "notes" => non_empty_string(pet_data["notes"]) - } - - case Kith.Pets.create_pet(account_id, attrs) do - {:ok, pet} -> - maybe_record_entity(import_job, "pet", pet_data["id"], "pet", pet.id) - [] - - {:error, reason} -> - ["Pet import error: #{inspect_errors(reason)}"] - end - end - end - - defp normalize_pet_species(nil), do: "other" - - defp normalize_pet_species(species) when is_map(species) do - normalize_pet_species(species["name"]) - end - - defp normalize_pet_species(species) when is_binary(species) do - normalized = String.downcase(species) - - if normalized in ~w(dog cat bird fish reptile rabbit hamster) do - normalized - else - "other" - end - end - - defp normalize_pet_species(_), do: "other" - - defp pet_duplicate?(contact_id, name, species) do - Repo.exists?( - from(p in Kith.Contacts.Pet, - where: - p.contact_id == ^contact_id and - fragment("lower(coalesce(?, ''))", p.name) == - fragment("lower(coalesce(?, ''))", ^(name || "")) and - p.species == ^species - ) - ) - end - - # ── Phase 6: Calls ───────────────────────────────────────────────── - - defp import_contact_calls( - credential, - base_url, - account_id, - _user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/calls" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => calls}} when is_list(calls) -> - Enum.flat_map(calls, fn call -> - import_single_call(account_id, contact, call, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch calls for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_call(account_id, contact, call_data, import_job) do - occurred_at = parse_datetime(call_data["called_at"]) - - if is_nil(occurred_at) do - [] - else - attrs = %{ - "occurred_at" => occurred_at, - "notes" => non_empty_string(call_data["content"]), - "duration_mins" => call_data["duration"] - } - - case Kith.Activities.create_call( - %{account_id: account_id, id: contact.id}, - attrs - ) do - {:ok, call} -> - maybe_record_entity(import_job, "call", call_data["id"], "call", call.id) - [] - - {:error, reason} -> - ["Call import error: #{inspect_errors(reason)}"] - end - end - end - - # ── Phase 7: Activities ──────────────────────────────────────────── - - defp import_contact_activities( - credential, - base_url, - account_id, - _user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/activities" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => activities}} when is_list(activities) -> - Enum.flat_map(activities, fn activity -> - import_single_activity(account_id, contact, activity, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch activities for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_activity(account_id, contact, activity_data, import_job) do - occurred_at = - parse_datetime(activity_data["happened_at"] || activity_data["date_it_happened"]) - - attrs = %{ - "title" => activity_data["summary"] || activity_data["title"] || "Imported activity", - "description" => non_empty_string(activity_data["description"]), - "occurred_at" => occurred_at || DateTime.utc_now() - } - - case Kith.Activities.create_activity(account_id, attrs, [contact.id]) do - {:ok, activity} -> - maybe_record_entity( - import_job, - "activity", - activity_data["id"], - "activity", - activity.id - ) - - [] - - {:error, reason} -> - ["Activity import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 8: Gifts ───────────────────────────────────────────────── - - defp import_contact_gifts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/gifts" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => gifts}} when is_list(gifts) -> - Enum.flat_map(gifts, fn gift -> - import_single_gift(account_id, user_id, contact, gift, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch gifts for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_gift(account_id, user_id, contact, gift_data, import_job) do - direction = - case gift_data["is_for"] do - "contact" -> "given" - _ -> "received" - end - - attrs = %{ - "contact_id" => contact.id, - "name" => gift_data["name"] || "Imported gift", - "description" => non_empty_string(gift_data["comment"]), - "direction" => direction, - "status" => - cond do - gift_data["has_been_offered"] -> "given" - gift_data["has_been_received"] -> "received" - true -> "idea" - end, - "amount" => gift_data["amount"], - "date" => parse_date_string(gift_data["date"]) - } - - case Kith.Gifts.create_gift(account_id, user_id, attrs) do - {:ok, gift} -> - maybe_record_entity(import_job, "gift", gift_data["id"], "gift", gift.id) - [] - - {:error, reason} -> - ["Gift import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 9: Debts ───────────────────────────────────────────────── - - defp import_contact_debts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/debts" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => debts}} when is_list(debts) -> - Enum.flat_map(debts, fn debt -> - import_single_debt(account_id, user_id, contact, debt, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch debts for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_debt(account_id, user_id, contact, debt_data, import_job) do - direction = - case debt_data["in_debt"] do - "yes" -> "owed_by_me" - _ -> "owed_to_me" - end - - attrs = %{ - "contact_id" => contact.id, - "title" => debt_data["reason"] || "Imported debt", - "amount" => debt_data["amount"] || "0", - "direction" => direction, - "status" => if(debt_data["status"] == "complete", do: "settled", else: "active") - } - - case Kith.Debts.create_debt(account_id, user_id, attrs) do - {:ok, debt} -> - maybe_record_entity(import_job, "debt", debt_data["id"], "debt", debt.id) - [] - - {:error, reason} -> - ["Debt import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 10: Tasks ──────────────────────────────────────────────── - - defp import_contact_tasks( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/tasks" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => tasks}} when is_list(tasks) -> - Enum.flat_map(tasks, fn task -> - import_single_task(account_id, user_id, contact, task, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch tasks for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_task(account_id, user_id, contact, task_data, import_job) do - status = if task_data["completed"], do: "completed", else: "pending" - - attrs = %{ - "contact_id" => contact.id, - "title" => task_data["title"] || "Imported task", - "description" => non_empty_string(task_data["description"]), - "status" => status - } - - case Kith.Tasks.create_task(account_id, user_id, attrs) do - {:ok, task} -> - maybe_record_entity(import_job, "task", task_data["id"], "task", task.id) - [] - - {:error, reason} -> - ["Task import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 11: Reminders ──────────────────────────────────────────── - - defp import_contact_reminders( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/reminders" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => reminders}} when is_list(reminders) -> - Enum.flat_map(reminders, fn reminder -> - import_single_reminder(account_id, user_id, contact, reminder, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch reminders for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_reminder(account_id, user_id, contact, reminder_data, import_job) do - {type, frequency} = map_monica_reminder_frequency(reminder_data["frequency_type"]) - - next_date = - parse_date_string(reminder_data["next_expected_date"]) || - Date.utc_today() - - attrs = %{ - "contact_id" => contact.id, - "type" => type, - "title" => reminder_data["title"] || "Imported reminder", - "frequency" => frequency, - "next_reminder_date" => next_date - } - - case Kith.Reminders.create_reminder(account_id, user_id, attrs) do - {:ok, reminder} -> - maybe_record_entity( - import_job, - "reminder", - reminder_data["id"], - "reminder", - reminder.id - ) - - [] - - {:error, reason} -> - ["Reminder import error: #{inspect_errors(reason)}"] - end - end - - defp map_monica_reminder_frequency("one_time"), do: {"one_time", nil} - defp map_monica_reminder_frequency("week"), do: {"recurring", "weekly"} - defp map_monica_reminder_frequency("month"), do: {"recurring", "monthly"} - defp map_monica_reminder_frequency("year"), do: {"recurring", "annually"} - defp map_monica_reminder_frequency(_), do: {"one_time", nil} - - # ── Phase 12: Conversations ──────────────────────────────────────── - - defp import_contact_conversations( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/conversations" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => convos}} when is_list(convos) -> - Enum.flat_map(convos, fn convo -> - import_single_conversation( - credential, - base_url, - account_id, - user_id, - contact, - convo, - import_job - ) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch conversations for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_conversation( - credential, - base_url, - account_id, - user_id, - contact, - convo_data, - import_job - ) do - platform = - case convo_data["contact_field_type"] do - %{"name" => name} -> normalize_conversation_platform(name) - _ -> "other" - end - - attrs = %{ - "contact_id" => contact.id, - "platform" => platform, - "subject" => non_empty_string(convo_data["subject"]) - } - - case Kith.Conversations.create_conversation(account_id, user_id, attrs) do - {:ok, conversation} -> - maybe_record_entity( - import_job, - "conversation", - convo_data["id"], - "conversation", - conversation.id - ) - - # Import messages for this conversation - import_conversation_messages( - credential, - base_url, - conversation, - convo_data, - import_job - ) - - {:error, reason} -> - ["Conversation import error: #{inspect_errors(reason)}"] - end - end - - defp import_conversation_messages(_credential, _base_url, conversation, convo_data, import_job) do - messages = convo_data["messages"] || [] - - Enum.flat_map(messages, fn msg -> - attrs = %{ - "body" => msg["content"] || msg["written_by_me_body"] || "", - "direction" => if(msg["written_by_me"], do: "sent", else: "received"), - "sent_at" => parse_datetime(msg["written_at"]) || DateTime.utc_now() - } - - case Kith.Conversations.add_message(conversation, attrs) do - {:ok, message} -> - maybe_record_entity(import_job, "message", msg["id"], "message", message.id) - [] - - {:error, reason} -> - ["Message import error: #{inspect_errors(reason)}"] - end - end) - end - - @platform_keywords [ - {"sms", "sms"}, - {"text", "sms"}, - {"whatsapp", "whatsapp"}, - {"telegram", "telegram"}, - {"email", "email"}, - {"instagram", "instagram"}, - {"messenger", "messenger"}, - {"facebook", "messenger"}, - {"signal", "signal"} - ] - - defp normalize_conversation_platform(name) when is_binary(name) do - normalized = String.downcase(name) - - Enum.find_value(@platform_keywords, "other", fn {keyword, platform} -> - if String.contains?(normalized, keyword), do: platform - end) - end - - defp normalize_conversation_platform(_), do: "other" - - # ── Additional date/time helpers ─────────────────────────────────── - - defp parse_datetime(nil), do: nil - - defp parse_datetime(str) when is_binary(str) do - case DateTime.from_iso8601(str) do - {:ok, dt, _offset} -> dt - _ -> nil - end - end - - defp parse_datetime(_), do: nil - - defp parse_date_string(nil), do: nil - - defp parse_date_string(str) when is_binary(str) do - case parse_date_or_datetime(str) do - {:ok, date} -> date - _ -> nil - end - end - - defp parse_date_string(_), do: nil - # ── Phase 13: Document import (async) ────────────────────────────── defp enqueue_document_imports(credential, account_id, user_id, import_job) do diff --git a/lib/kith/workers/monica_api_crawl_worker.ex b/lib/kith/workers/monica_api_crawl_worker.ex index b6afccd..bf981e9 100644 --- a/lib/kith/workers/monica_api_crawl_worker.ex +++ b/lib/kith/workers/monica_api_crawl_worker.ex @@ -18,6 +18,7 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do alias Kith.Imports alias Kith.Imports.Sources.MonicaApi alias Kith.Workers.DuplicateDetectionWorker + alias Kith.Workers.MonicaMiscDataWorker alias Kith.Workers.MonicaPhotoSyncWorker @impl Oban.Worker @@ -40,16 +41,20 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do ) do now = DateTime.utc_now() |> DateTime.truncate(:second) summary_map = ensure_map(summary) + {misc_plan, persisted_summary} = pop_misc_plan(summary_map) Imports.update_import_status(import_job, "completed", %{ - summary: summary_map, + summary: persisted_summary, completed_at: now }) + # Enqueue misc worker BEFORE wiping the API key — it needs the + # still-encrypted key in its job args (same pattern as photo sync). + maybe_enqueue_misc_data_worker(import_job, misc_plan) Imports.wipe_api_key(import_job) topic = "import:#{import_job.account_id}" - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, persisted_summary}) # Trigger duplicate detection for newly imported contacts Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) @@ -57,7 +62,7 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do # Enqueue photo sync (separate job) if the user opted in maybe_enqueue_photo_sync(import_job) - Logger.info("MonicaApi import #{import_id} completed: #{inspect(summary_map)}") + Logger.info("MonicaApi import #{import_id} completed: #{inspect(persisted_summary)}") :ok else {:error, reason} -> @@ -80,7 +85,8 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do defp build_credential(import_job) do %{ url: import_job.api_url, - api_key: import_job.api_key_encrypted + api_key: import_job.api_key_encrypted, + req_options: Application.get_env(:kith, :monica_req_options, []) } end @@ -114,5 +120,29 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do end end + defp maybe_enqueue_misc_data_worker(_import_job, []), do: :ok + + defp maybe_enqueue_misc_data_worker(import_job, plan) do + %{ + "import_id" => import_job.id, + "credential_url" => import_job.api_url, + "credential_api_key" => import_job.api_key_encrypted, + "plan" => plan + } + |> MonicaMiscDataWorker.new() + |> Oban.insert() + end + + # The misc-data plan is built by MonicaApi.crawl/5 and returned in the + # summary under either an atom or string key (the map round-trips through + # ensure_map/1). Pop it out before persisting so the plan is not stored + # in the DB summary. + defp pop_misc_plan(summary) do + {plan_atom, rest_atom} = Map.pop(summary, :misc_data_plan, []) + {plan_str, rest} = Map.pop(rest_atom, "misc_data_plan", []) + plan = if plan_atom == [], do: plan_str, else: plan_atom + {plan, rest} + end + defp ensure_map(m) when is_map(m), do: m end diff --git a/test/kith/workers/monica_api_crawl_worker_test.exs b/test/kith/workers/monica_api_crawl_worker_test.exs index 05e0a86..cc04eb1 100644 --- a/test/kith/workers/monica_api_crawl_worker_test.exs +++ b/test/kith/workers/monica_api_crawl_worker_test.exs @@ -150,5 +150,58 @@ defmodule Kith.Workers.MonicaApiCrawlWorkerTest do refute_enqueued(worker: MonicaPhotoSyncWorker) end + + test "enqueues MonicaMiscDataWorker with the plan from crawl summary", + %{user: user, account_id: account_id} do + # Boundary regression: the misc_data_plan key produced by + # MonicaApi.crawl/5 must reach MonicaMiscDataWorker.new/1 unmodified — + # the same wizard→crawl→worker contract that Bug C silently violated + # for auto_merge_duplicates in the previous PR. + + stub_name = :monica_crawl_misc_stub + + Application.put_env( + :kith, + :monica_req_options, + plug: {Req.Test, stub_name}, + retry: false + ) + + on_exit(fn -> Application.delete_env(:kith, :monica_req_options) end) + + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"calls" => true, "pets" => false} + }) + + contacts = + Kith.MonicaApiFixtures.contacts_page_json( + [ + Kith.MonicaApiFixtures.contact_json( + id: 7, + first_name: "Plan", + last_name: "Test", + statistics: %{"number_of_calls" => 2} + ) + ], + 1, + 1, + 1 + ) + + Req.Test.stub(stub_name, fn conn -> + Req.Test.json(conn, contacts) + end) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + assert_enqueued( + worker: Kith.Workers.MonicaMiscDataWorker, + args: %{"import_id" => import_job.id} + ) + end end end From 7f11d94313427a1a42e17d989abfb6817f166b8d Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 03:41:40 +0300 Subject: [PATCH 37/58] docs: update MonicaApi moduledoc for Phase 4 extraction --- lib/kith/imports/sources/monica_api.ex | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index a486a19..b7566f2 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -17,9 +17,13 @@ defmodule Kith.Imports.Sources.MonicaApi do 3. **Extra notes** — for contacts with `statistics.number_of_notes > 3`, fetch remaining notes via `GET /api/contacts/{id}/notes`. - Photo import is handled by `Kith.Workers.MonicaPhotoSyncWorker`, enqueued - separately by `MonicaApiCrawlWorker` after this crawl completes when the - user opts in via `api_options["photos"]`. + Per-contact "misc" data (pets, calls, activities, gifts, debts, tasks, + reminders, conversations) is planned during Phase 1 — for each contact, + endpoints with `statistics.number_of_X > 0` are recorded in + `summary.misc_data_plan` — and dispatched separately by + `Kith.Workers.MonicaMiscDataWorker`. Photo import and document import + follow the same separate-worker pattern (`MonicaPhotoSyncWorker`, + `MonicaDocumentImportWorker`). """ @behaviour Kith.Imports.Source From c4f32831e91794752a57df1b0f9f18c3bda52552 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 04:24:35 +0300 Subject: [PATCH 38/58] docs: spec for Monica import deployment fixes (PubSub + Oban + clustering) --- ...6-monica-import-deployment-fixes-design.md | 283 ++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md diff --git a/docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md b/docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md new file mode 100644 index 0000000..5e6d8b7 --- /dev/null +++ b/docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md @@ -0,0 +1,283 @@ +# Monica Import Deployment Fixes — Design Spec + +**Date:** 2026-05-16 +**Status:** Approved (brainstorming) +**Branch:** `fix/duplicate-detection` + +## Problem statement + +In the production split-container deployment (`docker-compose.prod.yml`: separate `app` and `worker` services), the Monica importer crashes with: + +``` +** (ArgumentError) unknown registry: Kith.PubSub. Either the registry name is + invalid or the registry is not running, possibly because its application + isn't started + (phoenix_pubsub 2.2.0) lib/phoenix/pubsub.ex:232: Phoenix.PubSub.broadcast/4 + (kith 0.1.0) lib/kith/imports/sources/monica_api.ex:255: ... + (oban 2.20.3) lib/oban/queue/executor.ex:145: Oban.Queue.Executor.perform/1 +``` + +Root cause analysis identifies **two distinct bugs** that compound each other: + +### Bug A — PubSub not started in worker mode + +`lib/kith/application.ex` starts `{Phoenix.PubSub, name: Kith.PubSub}` only in +`mode_children/0`, which is `[]` when `KITH_MODE=worker`. Every import job that +broadcasts progress or completion (`maybe_broadcast_progress/4`, +`MonicaApiCrawlWorker` completion, `MonicaMiscDataWorker` completion) crashes +on the worker container. + +### Bug B — Oban runs on both containers without gating + +`config/config.exs` configures Oban with `queues:` and `plugins:` set +unconditionally. Both `app` and `worker` containers start the same Oban +supervisor and race for jobs via Postgres row-level locks. Symptoms: + +- When the **app** wins the race, the job runs to completion (PubSub works, + LiveView gets progress) — but jobs leak into the web-facing container, + defeating the split. +- When the **worker** wins the race, the job crashes on first broadcast + (Bug A), retries via Oban, eventually fails or gets re-claimed by the app. + +### Bug C — PubSub does not cross containers + +Fixing Bug A and Bug B alone causes a regression: with `KITH_MODE=web` Oban +gated off, only the worker processes jobs; but `Phoenix.PubSub` (default +`PG2` adapter) requires connected BEAM nodes to span containers, and the +current deployment has no Erlang clustering (`RELEASE_COOKIE` unset, no +`DNS_CLUSTER_QUERY`, no `libcluster`). LiveView subscribers in the `app` +container would never receive worker-emitted broadcasts. + +Three LiveViews depend on these broadcasts: + +- `lib/kith_web/live/import_wizard_live.ex` (subscribes line 79) +- `lib/kith_web/live/settings_live/import.ex` (subscribes line 37) +- `lib/kith_web/live/import_history_live/show.ex` (subscribes line 19) + +## Goals + +1. Worker container processes Monica imports without crashing. +2. Only the worker container runs Oban jobs in production. +3. LiveView subscribers in the `app` container receive progress and completion + broadcasts emitted by the `worker` container. +4. No regression to dev (single container via `mix phx.server` or + `docker-compose.dev.yml`) or test (`Oban.testing: :manual`) environments. + +## Non-goals + +- Multi-replica scaling (multiple `app` or multiple `worker` containers). This + design targets the user's stated 1+1 topology. The clustering approach + (DNSCluster + shared alias) extends naturally to multi-replica, but no + config or testing is included for that case. +- Multi-DC deployments. The PG2 adapter is single-region; cross-region would + need a Redis or Postgres-LISTEN adapter (deferred). +- Refactoring the Monica importer or misc worker beyond what these fixes + require. +- Replacing Phoenix.PubSub with an external broker. + +## Architecture + +### `lib/kith/application.ex` + +`Phoenix.PubSub` and `DNSCluster` move from `mode_children/0` to +`base_children/0`. These are application-layer concerns, not HTTP-layer: + +```elixir +defp base_children do + Kith.Geocoding.install_fuse() + Kith.Weather.install_fuse() + Kith.SentryEventHandler.attach() + :logger.add_handler(:sentry_handler, Sentry.LoggerHandler, %{}) + + [ + Kith.Vault, + Kith.Repo, + {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, + {Oban, Application.fetch_env!(:kith, Oban)}, + {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, + {Task.Supervisor, name: Kith.TaskSupervisor}, + {Phoenix.PubSub, name: Kith.PubSub}, + {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore} + ] +end + +defp mode_children do + case System.get_env("KITH_MODE", "web") do + "worker" -> + [] + + _web -> + [ + Kith.PromEx, + KithWeb.Telemetry, + KithWeb.Endpoint + ] + end +end +``` + +`KithWeb.Endpoint` references `pubsub_server: Kith.PubSub` (config.exs:79). +PubSub now starts strictly before Endpoint within `base_children` → ordering +is safe. + +### `config/runtime.exs` — Oban mode gating + +Added inside the existing `if config_env() == :prod do` block (near the +`# Rate limiting` section, around line 216): + +```elixir +# Oban — only the worker container processes jobs in production. +# The web container can call `Oban.insert/1` (queues are still defined +# by name in config.exs so insertion validates) but runs no queues or +# plugins. KITH_MODE=worker keeps the full config from config.exs. +case System.get_env("KITH_MODE", "web") do + "worker" -> + :ok + + _web -> + config :kith, Oban, queues: false, plugins: false +end +``` + +This is wrapped by the `:prod` env guard so dev (`mix phx.server`, +single-container `docker-compose.dev.yml`) is unaffected. Test env is +already pinned to `testing: :manual` in `config/test.exs:27`. + +### `docker-compose.prod.yml` — clustering + +Both `app` and `worker` services gain: + +```yaml +hostname: kith-app # or kith-worker +environment: + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster +networks: + default: + aliases: + - kith-cluster +``` + +Mechanics: + +- `RELEASE_COOKIE` (same on both): the Erlang distribution shared secret. + Required env; if unset, BEAM generates a random one per container and + nodes can't connect. +- `RELEASE_DISTRIBUTION: name`: long-form node names use FQDN-style hostnames, + letting Docker DNS resolve them. +- `hostname: kith-app` / `kith-worker`: unique BEAM node hostnames. The + resulting node names are `kith@kith-app` and `kith@kith-worker`. +- `aliases: [kith-cluster]`: both containers register this alias in Docker's + embedded DNS. `kith-cluster` then resolves to **both** container IPs (Docker + returns all matching A records). +- `DNS_CLUSTER_QUERY: kith-cluster`: tells `Phoenix.DNSCluster` (already a dep) + to query that name on a periodic interval. Each result IP it doesn't already + see as a connected node gets `Node.connect/1`. Idempotent and self-healing. + +Once nodes are connected, `Phoenix.PubSub` with the default PG2 adapter +broadcasts cross-node automatically. No code changes elsewhere needed. + +### `.env.example` + +Add a `RELEASE_COOKIE` entry with generation instructions: + +```bash +# Erlang BEAM distribution cookie (shared between app and worker containers +# so they can cluster for cross-container PubSub). Generate with: +# mix phx.gen.secret 32 +# or: +# openssl rand -base64 32 +RELEASE_COOKIE=your-shared-cookie-here +``` + +Place it adjacent to `SECRET_KEY_BASE` in the secrets section. + +## Verification + +### Automated + +- Existing test suite continues to pass unchanged. PubSub is now started in + `base_children`, which already runs in test env via `Kith.DataCase`. The + Oban gating block is wrapped in `if config_env() == :prod` so test env is + not affected. + +### Manual (prod-like) + +```bash +# 1. Generate cookie +RELEASE_COOKIE=$(openssl rand -base64 32) +# add to .env + +# 2. Bring up the prod stack +docker compose -f docker-compose.prod.yml up --build + +# 3. Verify clustering +docker compose -f docker-compose.prod.yml exec app \ + /app/bin/kith eval 'IO.inspect(Node.list())' +# Expected: [:"kith@kith-worker"] + +docker compose -f docker-compose.prod.yml exec worker \ + /app/bin/kith eval 'IO.inspect(Node.list())' +# Expected: [:"kith@kith-app"] + +# 4. Verify Oban gating +docker compose -f docker-compose.prod.yml exec app \ + /app/bin/kith eval 'IO.inspect(Oban.config().queues)' +# Expected: [] or false (web is insert-only) + +docker compose -f docker-compose.prod.yml exec worker \ + /app/bin/kith eval 'IO.inspect(Oban.config().queues)' +# Expected: [default: 10, mailers: 10, ...] (full config) + +# 5. Trigger an import from the wizard UI; observe: +# - worker logs: MonicaApiCrawlWorker starts and progresses +# - app logs: no Oban executor logs +# - browser: LiveView progress bar updates in real time +# - browser: completion message renders when crawl finishes +``` + +### Failure modes to watch for + +- `RELEASE_COOKIE` unset → containers generate independent cookies → nodes + never connect. Symptom: `Node.list()` is empty on both, progress doesn't + cross. Fix: set the env var. +- Docker DNS returns only one IP for the alias → only one connection + direction works. Mitigation: DNSCluster polls periodically; the other + direction self-heals within a few seconds. Symptom of permanent breakage: + `Node.list()` empty on one container. +- Worker container started before app container's BEAM is ready → initial + cluster connect may fail, then succeed on the next DNSCluster poll. Not + user-visible because no import would be running during that window. + +## Trade-offs + +| Aspect | Cost | Mitigation | +|---|---|---| +| New env var `RELEASE_COOKIE` | One more secret to manage | Standard Erlang/Phoenix pattern; documented in `.env.example` | +| BEAM distribution exposed inside Docker network | Increases internal attack surface if Docker network is compromised | Network is internal-only (no published ports); cookie is opaque to anyone without the secret | +| DNSCluster polling overhead | One DNS query every 5s per container | Negligible; same as existing Phoenix-stack pattern | +| Bound to 1-app-1-worker topology for now | Multi-replica needs further testing | Documented as non-goal; DNSCluster + alias extends naturally | + +## Implementation order + +1. `lib/kith/application.ex` — move PubSub + DNSCluster to base_children. Tests + pass. +2. `config/runtime.exs` — add Oban gating block. Tests pass (gated by `:prod`). +3. `docker-compose.prod.yml` — add hostname, env vars, network alias. +4. `.env.example` — document RELEASE_COOKIE. +5. Manual smoke test per the verification section above. + +Each step is independently committable; an intermediate state (e.g. step 1+2 +without step 3) is "worker no longer crashes, race condition remains" — a +strict improvement over the current state. + +## Out of scope (future work) + +- Multi-replica web/worker scaling +- Replacing PG2 PubSub with Redis or Postgres for cross-DC support +- Health checks for cluster connection state (could surface a degraded mode + indicator in the import history UI) +- Migrating `Phoenix.PubSub.broadcast` call sites in the import path to a + thin wrapper that logs broadcasts (helpful for ops debugging, but not + required for correctness) From 25ea12053016e1f627d0053c73a0cb6a906f8bf8 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 04:29:16 +0300 Subject: [PATCH 39/58] docs: implementation plan for Monica import deployment fixes --- ...26-05-16-monica-import-deployment-fixes.md | 543 ++++++++++++++++++ 1 file changed, 543 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-16-monica-import-deployment-fixes.md diff --git a/docs/superpowers/plans/2026-05-16-monica-import-deployment-fixes.md b/docs/superpowers/plans/2026-05-16-monica-import-deployment-fixes.md new file mode 100644 index 0000000..a6f9c82 --- /dev/null +++ b/docs/superpowers/plans/2026-05-16-monica-import-deployment-fixes.md @@ -0,0 +1,543 @@ +# Monica Import Deployment Fixes Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Stop Monica imports from crashing on the worker container, route Oban jobs exclusively to the worker, and cluster the two BEAM nodes so LiveView progress broadcasts cross containers. + +**Architecture:** Move `Phoenix.PubSub` + `DNSCluster` from `mode_children/0` to `base_children/0` so worker mode also starts them. In `runtime.exs` (`:prod` only), gate Oban to insert-only when `KITH_MODE=web`. In `docker-compose.prod.yml`, give each container a unique hostname plus a shared network alias (`kith-cluster`), share `RELEASE_COOKIE`, and set `DNS_CLUSTER_QUERY=kith-cluster`. Phoenix.PubSub's default PG2 adapter then fans broadcasts across both nodes automatically. + +**Tech Stack:** Elixir 1.18, Phoenix LiveView, Phoenix.PubSub (PG2), DNSCluster 0.2+, Oban 2.18, Docker Compose v2. + +**Reference spec:** `docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md` + +--- + +## Task 1: Move PubSub + DNSCluster to base_children + +**Files:** +- Modify: `lib/kith/application.ex` + +- [ ] **Step 1: Inspect the current supervisor tree** + +Run: `grep -n "Phoenix.PubSub\|DNSCluster\|mode_children\|base_children" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/application.ex` + +Expected: matches at the function heads of `base_children/0` and `mode_children/0`, plus the existing PubSub + DNSCluster child specs inside `mode_children/0`'s `_web` branch. + +- [ ] **Step 2: Edit `base_children/0` and `mode_children/0`** + +In `lib/kith/application.ex`, find the current block: + +```elixir + defp base_children do + # Install fuse circuit breakers before starting supervised children + Kith.Geocoding.install_fuse() + Kith.Weather.install_fuse() + # Attach Sentry telemetry handler for Oban job failures + Kith.SentryEventHandler.attach() + # Capture crashes via Erlang logger handler (Sentry v10+, replaces PlugCapture) + :logger.add_handler(:sentry_handler, Sentry.LoggerHandler, %{}) + + [ + Kith.Vault, + Kith.Repo, + {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, + {Oban, Application.fetch_env!(:kith, Oban)}, + {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, + {Task.Supervisor, name: Kith.TaskSupervisor} + ] + end + + defp mode_children do + case System.get_env("KITH_MODE", "web") do + "worker" -> + [] + + _web -> + [ + Kith.PromEx, + KithWeb.Telemetry, + {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore}, + {Phoenix.PubSub, name: Kith.PubSub}, + KithWeb.Endpoint + ] + end + end +``` + +Replace with: + +```elixir + defp base_children do + # Install fuse circuit breakers before starting supervised children + Kith.Geocoding.install_fuse() + Kith.Weather.install_fuse() + # Attach Sentry telemetry handler for Oban job failures + Kith.SentryEventHandler.attach() + # Capture crashes via Erlang logger handler (Sentry v10+, replaces PlugCapture) + :logger.add_handler(:sentry_handler, Sentry.LoggerHandler, %{}) + + [ + Kith.Vault, + Kith.Repo, + {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, + {Oban, Application.fetch_env!(:kith, Oban)}, + {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, + {Task.Supervisor, name: Kith.TaskSupervisor}, + # PubSub + DNSCluster live here (not in mode_children) so worker mode + # also starts them. Required for cross-container progress broadcasts + # in the split-deployment topology (`docker-compose.prod.yml`). + {Phoenix.PubSub, name: Kith.PubSub}, + {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore} + ] + end + + defp mode_children do + case System.get_env("KITH_MODE", "web") do + "worker" -> + [] + + _web -> + [ + Kith.PromEx, + KithWeb.Telemetry, + KithWeb.Endpoint + ] + end + end +``` + +Notes: +- PubSub appears before `KithWeb.Endpoint` in startup order, because base_children precedes mode_children in `start/2`. `KithWeb.Endpoint` reads `pubsub_server: Kith.PubSub` from config — the registry is ready before it's needed. +- `DNSCluster` is harmless when its query is `:ignore` (the current default when no `DNS_CLUSTER_QUERY` env var is set). + +- [ ] **Step 3: Compile and run the suite** + +Run: `mix compile --warnings-as-errors && mix test` + +Expected: PASS. 1138 tests, 0 failures (current baseline). `Kith.PubSub` is now running in test env too, which is invisible to test code (no test subscribes/broadcasts; the existing ones use it transparently via LiveView mounts). + +- [ ] **Step 4: Manual smoke check — worker-mode startup** + +Run: `KITH_MODE=worker iex -S mix` + +Expected: app starts, no crash. Inside IEx, verify PubSub is running: + +```elixir +Process.whereis(Kith.PubSub) +# Expected: a PID, not nil +``` + +Exit IEx with `:q + Enter` (twice) or Ctrl-C twice. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection +git add lib/kith/application.ex +git commit -m "fix: start PubSub + DNSCluster in base_children for worker mode" +``` + +--- + +## Task 2: Gate Oban queues by KITH_MODE in :prod + +**Files:** +- Modify: `config/runtime.exs` + +- [ ] **Step 1: Find the rate-limiting block (anchor for the new block)** + +Run: `grep -n "Rate limiting" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/config/runtime.exs` + +Expected: a match around line 208 (`# Rate limiting — optional Redis backend`). + +- [ ] **Step 2: Add the Oban gating block** + +In `config/runtime.exs`, find the existing rate-limiting block ending around the existing `if System.get_env("RATE_LIMIT_BACKEND") == "redis" do ... end` block. Immediately AFTER that `end`, but still inside the outer `if config_env() == :prod do` block, add: + +```elixir + # Oban — only the worker container processes jobs in production. + # The web container can call `Oban.insert/1` to enqueue jobs, but + # runs no queues or plugins (no cron, no pruner) — so it never claims + # rows from `oban_jobs`. The worker container keeps the full config + # from `config.exs`. + # + # Dev (`config_env() == :dev`) is unaffected: this block only runs in + # `:prod`. Test env is pinned to `testing: :manual` in `config/test.exs`. + case System.get_env("KITH_MODE", "web") do + "worker" -> + :ok + + _web -> + config :kith, Oban, queues: false, plugins: false + end +``` + +Make sure indentation matches the surrounding `:prod` block (two spaces). + +- [ ] **Step 3: Verify placement is inside the `:prod` guard** + +Run: `grep -n "config_env\|config :kith, Oban" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/config/runtime.exs` + +Expected: the new `config :kith, Oban` line appears between the `if config_env() == :prod do` line and its closing `end`. The `case KITH_MODE` should NOT be at the top level of the file. + +- [ ] **Step 4: Run the test suite** + +Run: `mix test` + +Expected: PASS, 1138 tests, 0 failures. Test env is `:test` (not `:prod`), so the new block is unreached. + +- [ ] **Step 5: Smoke-check the prod compilation path** + +Run: `MIX_ENV=prod mix compile 2>&1 | tail -20` + +Expected: clean compile (the runtime.exs file is read but not evaluated at compile time, so any logical mistakes won't surface here — Step 6's IEx test is the real check). + +- [ ] **Step 6: Manual IEx check (simulate prod KITH_MODE=web)** + +Run: `MIX_ENV=prod KITH_MODE=web iex -S mix` + +Expected: app starts, then in IEx: + +```elixir +Application.get_env(:kith, Oban) |> Keyword.get(:queues) +# Expected: false +Application.get_env(:kith, Oban) |> Keyword.get(:plugins) +# Expected: false +``` + +Note: `MIX_ENV=prod iex -S mix` may fail if you don't have a prod DB / SECRET_KEY_BASE set. If it raises on startup before reaching IEx, switch to: + +```bash +MIX_ENV=prod KITH_MODE=web mix run -e 'IO.inspect(Application.fetch_env!(:kith, Oban))' +``` + +(also expected to raise on missing prod env vars, but the `config :kith, Oban, ...` mutation runs before that and you'll see `queues: false, plugins: false` in the inspected value if you can get it to surface. If you can't get prod env happily booting, skip this step and rely on Step 7's separate IEx-based KITH_MODE=worker check.) + +- [ ] **Step 7: Manual IEx check (simulate prod KITH_MODE=worker)** + +If prod-env IEx works: + +```bash +MIX_ENV=prod KITH_MODE=worker iex -S mix +``` + +Then: + +```elixir +Application.get_env(:kith, Oban) |> Keyword.get(:queues) +# Expected: a keyword list with default: 10, mailers: 10, ... (full config) +``` + +If prod IEx is not bootable in your local environment, accept that Step 4's test pass + the inline code review (Step 3) suffice; the real verification will happen in the docker-compose smoke test at Task 5. + +- [ ] **Step 8: Commit** + +```bash +git add config/runtime.exs +git commit -m "fix: gate Oban queues by KITH_MODE in :prod (web=insert-only)" +``` + +--- + +## Task 3: Add clustering env to `docker-compose.prod.yml` + +**Files:** +- Modify: `docker-compose.prod.yml` + +- [ ] **Step 1: Locate the `app` service definition** + +Run: `grep -n "^ app:\|^ worker:" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/docker-compose.prod.yml` + +Expected: `app:` around line 61, `worker:` around line 135. + +- [ ] **Step 2: Add hostname + env vars + network alias to the `app` service** + +In `docker-compose.prod.yml`, find the `app:` block. Insert a `hostname:` field right after `command:` (or another visible top-level field) and add the three new env vars in its `environment:` block. Then add a `networks:` block at the same level as `environment:`. + +The `app` service block should look like: + +```yaml + app: + image: kith:latest + command: ["start"] + hostname: kith-app + depends_on: + migrate: + condition: service_completed_successfully + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + read_only: true + tmpfs: + - /tmp:size=64M + environment: + # ── BEAM distribution / clustering ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster + # ── existing env vars unchanged ── + DATABASE_URL: ${DATABASE_URL} + SECRET_KEY_BASE: ${SECRET_KEY_BASE} + # ... (leave the rest of the env block unchanged) + networks: + default: + aliases: + - kith-cluster + volumes: + - uploads:/app/uploads + # ... (rest unchanged) +``` + +Important: the existing block does not declare a `networks:` section because Compose creates a default network automatically. The new `networks:` section attaches this service to that same default network, with the `kith-cluster` alias added. Compose accepts this without explicit network definition; if Compose complains about missing top-level `networks:` declaration, add this block at the bottom of the file (outside any service): + +```yaml +networks: + default: + name: kith_default +``` + +(only add the top-level block if Compose errors without it — start with just the per-service alias block and only add the top-level if needed.) + +- [ ] **Step 3: Add the same three env vars + alias + hostname to the `worker` service** + +In the `worker:` block, mirror the changes from Step 2 but use `kith-worker` as the hostname: + +```yaml + worker: + image: kith:latest + command: ["start"] + hostname: kith-worker + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + read_only: true + tmpfs: + - /tmp:size=64M + depends_on: + postgres: + condition: service_healthy + migrate: + condition: service_completed_successfully + environment: + # ── BEAM distribution / clustering ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster + # ── existing env vars unchanged ── + DATABASE_URL: ${DATABASE_URL} + SECRET_KEY_BASE: ${SECRET_KEY_BASE} + # ... (leave the rest unchanged) + networks: + default: + aliases: + - kith-cluster + # ... (rest unchanged) +``` + +- [ ] **Step 4: Validate the compose file** + +Run: + +```bash +docker compose -f /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/docker-compose.prod.yml config 2>&1 | head -40 +``` + +Expected: a parsed render of the compose file with no error. The output should include: +- `hostname: kith-app` and `hostname: kith-worker` lines +- `RELEASE_COOKIE`, `RELEASE_DISTRIBUTION: name`, `DNS_CLUSTER_QUERY: kith-cluster` env keys on both services +- `aliases: [kith-cluster]` under both `app.networks.default` and `worker.networks.default` + +If `config` errors out about an undefined `RELEASE_COOKIE` env var, that's expected unless you've already added it to `.env`. Re-run with `RELEASE_COOKIE=$(openssl rand -base64 32) docker compose ... config`. The validation is about structure, not values. + +- [ ] **Step 5: Commit** + +```bash +git add docker-compose.prod.yml +git commit -m "infra: cluster app + worker containers via shared cookie + DNS alias" +``` + +--- + +## Task 4: Document `RELEASE_COOKIE` in `.env.example` + +**Files:** +- Modify: `.env.example` + +- [ ] **Step 1: Find the section anchor** + +Run: `grep -n "SECRET_KEY_BASE" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/.env.example` + +Expected: a line introducing the SECRET_KEY_BASE entry. Use this as the anchor. + +- [ ] **Step 2: Add the `RELEASE_COOKIE` entry** + +In `.env.example`, find the `SECRET_KEY_BASE` block and immediately AFTER it (after any comments and the SECRET_KEY_BASE= line itself), add: + +```bash +# Erlang BEAM distribution cookie. Shared between the app and worker +# containers so they can cluster for cross-container PubSub broadcasts +# (LiveView import progress). Generate with one of: +# mix phx.gen.secret 32 +# openssl rand -base64 32 +RELEASE_COOKIE= +``` + +The trailing empty value is intentional — `.env.example` uses empty placeholders elsewhere as a "fill this in" signal. Match the file's style; if other secrets use a placeholder like ``, mirror that. + +- [ ] **Step 3: Verify the example file** + +Run: `grep -A 5 "RELEASE_COOKIE" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/.env.example` + +Expected: see the comment + the empty assignment. + +- [ ] **Step 4: Commit** + +```bash +git add .env.example +git commit -m "docs: document RELEASE_COOKIE in .env.example" +``` + +--- + +## Task 5: Manual smoke verification (docker-compose.prod) + +**Files:** *(no code changes — verification only)* + +- [ ] **Step 1: Generate a cookie and put it in `.env`** + +```bash +cd /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection +echo "RELEASE_COOKIE=$(openssl rand -base64 32)" >> .env +chmod 600 .env +``` + +(skip if your `.env` already has `RELEASE_COOKIE` set.) + +- [ ] **Step 2: Build the prod image with the new code** + +```bash +docker build -t kith:latest . +``` + +Expected: a successful build. + +- [ ] **Step 3: Bring up the prod stack** + +```bash +docker compose -f docker-compose.prod.yml up -d +``` + +Wait ~30 seconds for migrate + app + worker to come up. Check status: + +```bash +docker compose -f docker-compose.prod.yml ps +``` + +Expected: `migrate` exited 0, `postgres` running healthy, `app` and `worker` both running. + +- [ ] **Step 4: Verify clustering** + +```bash +docker compose -f docker-compose.prod.yml exec app /app/bin/kith eval 'IO.inspect(Node.list())' +``` + +Expected: `[:"kith@kith-worker"]` + +```bash +docker compose -f docker-compose.prod.yml exec worker /app/bin/kith eval 'IO.inspect(Node.list())' +``` + +Expected: `[:"kith@kith-app"]` + +If either returns `[]`, wait 10 more seconds (DNSCluster polls periodically) and retry. If still empty, check the symptom matrix in the spec's "Failure modes to watch for" section. + +- [ ] **Step 5: Verify Oban gating** + +```bash +docker compose -f docker-compose.prod.yml exec app /app/bin/kith eval \ + 'IO.inspect(Application.fetch_env!(:kith, Oban) |> Keyword.get(:queues))' +``` + +Expected: `false` + +```bash +docker compose -f docker-compose.prod.yml exec worker /app/bin/kith eval \ + 'IO.inspect(Application.fetch_env!(:kith, Oban) |> Keyword.get(:queues))' +``` + +Expected: `[default: 10, mailers: 10, reminders: 5, exports: 2, imports: 2, immich: 3, purge: 1]` + +- [ ] **Step 6: Trigger an import via the wizard** + +In a browser, open the app (URL per your local Caddy config, usually `http://localhost`), log in, go to **Settings → Import**, choose **Monica CRM (API)**, enter test credentials for your Monica instance, start the import. + +Observe (using `docker compose -f docker-compose.prod.yml logs -f`): + +- Worker container log shows `MonicaApiCrawlWorker` starting +- App container log does NOT show `Oban` executor logs +- Browser shows a progress bar updating in real time (PubSub crossed containers) +- On completion, the wizard shows the "import complete" UI + +- [ ] **Step 7: Verify the misc worker also runs on worker** + +While the import is running (or shortly after main crawl completes), check Oban's job table: + +```bash +docker compose -f docker-compose.prod.yml exec postgres \ + psql -U kith -d kith_prod -c \ + "SELECT id, worker, queue, state FROM oban_jobs ORDER BY id DESC LIMIT 10;" +``` + +Expected: see rows for `Kith.Workers.MonicaApiCrawlWorker` and (after main crawl completes) `Kith.Workers.MonicaMiscDataWorker`, all with `state = 'completed'` (or `executing` while in flight). + +- [ ] **Step 8: Verify no PubSub crash on worker** + +```bash +docker compose -f docker-compose.prod.yml logs worker | grep -i 'unknown registry\|Kith.PubSub' +``` + +Expected: empty output (no crashes referencing Kith.PubSub). + +- [ ] **Step 9: Tear down** + +```bash +docker compose -f docker-compose.prod.yml down +``` + +(or leave running if you want to keep iterating.) + +- [ ] **Step 10: No commit for this task** (verification-only). + +--- + +## Self-review checklist + +Before handing off: + +1. **Spec coverage:** + - Bug A (PubSub crash in worker mode) → Task 1 ✓ + - Bug B (Oban race) → Task 2 ✓ + - Bug C (cross-container PubSub) → Task 1 + Task 3 ✓ + - `.env.example` documentation → Task 4 ✓ + - Verification → Task 5 ✓ + +2. **Placeholders:** Every step has concrete code/commands. No "TBD", "implement later", "add error handling". + +3. **Type consistency:** + - `KITH_MODE` env var spelled consistently (matches application.ex case statement) + - `RELEASE_COOKIE`, `RELEASE_DISTRIBUTION`, `DNS_CLUSTER_QUERY` consistent across compose + spec + - Network alias `kith-cluster` consistent on both services + `DNS_CLUSTER_QUERY` value + - Hostnames `kith-app` / `kith-worker` consistent with Node.list() expectations in Task 5 + +4. **Order safety:** + - Task 1 is safe in isolation (PubSub starts in worker mode, no behavior change in web mode) + - Task 2 builds on Task 1 (without Task 1, gating queues to worker means jobs run there and crash on PubSub broadcast) + - Task 3 builds on Task 2 (without clustering, gating means no LiveView progress) + - Task 4 is metadata-only + - Task 5 verifies the cumulative effect + + If anything stops working mid-implementation, intermediate state after Task 1 alone is strictly better than current state (crash is fixed; race remains). + +5. **Backout:** Each task is a single commit. `git revert ` cleanly undoes any one task without affecting the others (Task 2 depends on Task 1 for correctness but not for compile; the inverse holds for Task 3 + Task 2). From 147d6f0e3fb1bbb576c61aaad82a45b9b4a873e9 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 04:35:06 +0300 Subject: [PATCH 40/58] fix: start PubSub + DNSCluster in base_children for worker mode --- lib/kith/application.ex | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/kith/application.ex b/lib/kith/application.ex index 5ce9f9e..b521fd7 100644 --- a/lib/kith/application.ex +++ b/lib/kith/application.ex @@ -26,7 +26,12 @@ defmodule Kith.Application do {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, {Oban, Application.fetch_env!(:kith, Oban)}, {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, - {Task.Supervisor, name: Kith.TaskSupervisor} + {Task.Supervisor, name: Kith.TaskSupervisor}, + # PubSub + DNSCluster live here (not in mode_children) so worker mode + # also starts them. Required for cross-container progress broadcasts + # in the split-deployment topology (`docker-compose.prod.yml`). + {Phoenix.PubSub, name: Kith.PubSub}, + {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore} ] end @@ -39,8 +44,6 @@ defmodule Kith.Application do [ Kith.PromEx, KithWeb.Telemetry, - {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore}, - {Phoenix.PubSub, name: Kith.PubSub}, KithWeb.Endpoint ] end From f38aac7a1d1268e1cf55e65b33fce6af5644866e Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 04:39:21 +0300 Subject: [PATCH 41/58] fix: gate Oban queues by KITH_MODE in :prod (web=insert-only) --- config/runtime.exs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/config/runtime.exs b/config/runtime.exs index 944d31e..200f02c 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -215,6 +215,22 @@ if config_env() == :prod do backend: {Hammer.Backend.Redis, [expiry_ms: 60_000 * 60, redis_url: redis_url]} end + # Oban — only the worker container processes jobs in production. + # The web container can call `Oban.insert/1` to enqueue jobs, but + # runs no queues or plugins (no cron, no pruner) — so it never claims + # rows from `oban_jobs`. The worker container keeps the full config + # from `config.exs`. + # + # Dev (`config_env() == :dev`) is unaffected: this block only runs in + # `:prod`. Test env is pinned to `testing: :manual` in `config/test.exs`. + case System.get_env("KITH_MODE", "web") do + "worker" -> + :ok + + _web -> + config :kith, Oban, queues: false, plugins: false + end + # Sentry error tracking (optional — only when SENTRY_DSN is set) if sentry_dsn = System.get_env("SENTRY_DSN") do config :sentry, From 6e9eb956322f36506c07d439d0edcb9481125b30 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 04:43:09 +0300 Subject: [PATCH 42/58] infra: cluster app + worker containers via shared cookie + DNS alias --- docker-compose.prod.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 2f5f191..4fe71f7 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -61,6 +61,7 @@ services: app: image: kith:latest command: ["start"] + hostname: kith-app depends_on: migrate: condition: service_completed_successfully @@ -72,6 +73,11 @@ services: tmpfs: - /tmp:size=64M environment: + # ── BEAM distribution / clustering ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster + # ── existing env vars unchanged ── DATABASE_URL: ${DATABASE_URL} SECRET_KEY_BASE: ${SECRET_KEY_BASE} KITH_HOSTNAME: ${KITH_HOSTNAME:-localhost} @@ -116,6 +122,10 @@ services: REDIS_URL: ${REDIS_URL:-} # Network TRUSTED_PROXIES: ${TRUSTED_PROXIES:-172.16.0.0/12} + networks: + default: + aliases: + - kith-cluster volumes: - uploads:/app/uploads deploy: @@ -135,6 +145,7 @@ services: worker: image: kith:latest command: ["start"] + hostname: kith-worker security_opt: - no-new-privileges:true cap_drop: @@ -148,6 +159,11 @@ services: migrate: condition: service_completed_successfully environment: + # ── BEAM distribution / clustering ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster + # ── existing env vars unchanged ── DATABASE_URL: ${DATABASE_URL} SECRET_KEY_BASE: ${SECRET_KEY_BASE} KITH_HOSTNAME: ${KITH_HOSTNAME:-localhost} @@ -178,6 +194,10 @@ services: IMMICH_API_KEY: ${IMMICH_API_KEY:-} IMMICH_SYNC_INTERVAL_HOURS: ${IMMICH_SYNC_INTERVAL_HOURS:-24} METRICS_TOKEN: ${METRICS_TOKEN} + networks: + default: + aliases: + - kith-cluster volumes: - uploads:/app/uploads deploy: From d39b5a691030e0a60fc641b718d9e6e0d83e04fe Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 04:46:27 +0300 Subject: [PATCH 43/58] docs: document RELEASE_COOKIE in .env.example --- .env.example | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.env.example b/.env.example index 131136b..5dd039b 100644 --- a/.env.example +++ b/.env.example @@ -12,6 +12,13 @@ # Core — REQUIRED (no defaults) # ============================================================ SECRET_KEY_BASE=generate-with-mix-phx-gen-secret +# Erlang BEAM distribution cookie. Shared between the app and worker +# containers so they can cluster for cross-container PubSub broadcasts +# (LiveView import progress). Generate with one of: +# mix phx.gen.secret 32 +# openssl rand -base64 32 +RELEASE_COOKIE=generate-with-mix-phx-gen-secret + DATABASE_URL=ecto://kith:change_me@postgres:5432/kith_prod AUTH_TOKEN_SALT=generate-with-mix-phx-gen-secret CLOAK_KEY=generate-32-byte-base64-key From 0312a33cb4454b6e2a6b2cb790d1b755486b57cb Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 12:44:04 +0300 Subject: [PATCH 44/58] fix: replace DNSCluster with libcluster Epmd strategy for cross-container clustering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DNSCluster connects via `Node.connect(:"basename@")` — it uses the raw IP as the host part of the node name. That requires each peer's actual node name to be `name@`, which conflicts with Phoenix release's default of `name@`. The user's Portainer deployment exposes containers under stable service names (`app`, `worker`) that resolve via Docker DNS — but the BEAM nodes are named after the container ID (`kith@64c98536e88c`), so `Node.connect(:"kith@app")` fails the handshake. Switch to libcluster's Epmd strategy which connects by explicit node name (no IP rewriting). Each container is configured via env to: - `RELEASE_NODE=kith@app` (or `kith@worker`) - `KITH_CLUSTER_HOSTS=kith@app,kith@worker` - `RELEASE_COOKIE=` - `RELEASE_DISTRIBUTION=name` libcluster runs Cluster.Strategy.Epmd which polls `Node.connect/1` for each host periodically; once one direction connects, the bidirectional distribution is established and PubSub spans both. Dev and test are unaffected: `KITH_CLUSTER_HOSTS` is unset, so the libcluster topology list is empty and Cluster.Supervisor no-ops. --- config/runtime.exs | 32 ++++++++++++++++++++++++++++++++ docker-compose.prod.yml | 22 ++++++++-------------- lib/kith/application.ex | 13 +++++++++---- mix.exs | 2 +- mix.lock | 2 +- 5 files changed, 51 insertions(+), 20 deletions(-) diff --git a/config/runtime.exs b/config/runtime.exs index 200f02c..ddcf644 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -231,6 +231,38 @@ if config_env() == :prod do config :kith, Oban, queues: false, plugins: false end + # libcluster — connect this BEAM node to its peer(s) so Phoenix.PubSub + # broadcasts span containers (web ↔ worker). Configure via + # `KITH_CLUSTER_HOSTS` env var: comma-separated long node names, e.g. + # `kith@app,kith@worker`. Leave unset to disable clustering (single-node). + # + # Each container must also set `RELEASE_DISTRIBUTION=name` and + # `RELEASE_NODE=kith@` so its actual node name matches the + # name listed in `KITH_CLUSTER_HOSTS`. `RELEASE_COOKIE` must be shared. + cluster_hosts = + case System.get_env("KITH_CLUSTER_HOSTS") do + nil -> + [] + + "" -> + [] + + str -> + str + |> String.split(",", trim: true) + |> Enum.map(&(&1 |> String.trim() |> String.to_atom())) + end + + if cluster_hosts != [] do + config :libcluster, + topologies: [ + kith: [ + strategy: Cluster.Strategy.Epmd, + config: [hosts: cluster_hosts] + ] + ] + end + # Sentry error tracking (optional — only when SENTRY_DSN is set) if sentry_dsn = System.get_env("SENTRY_DSN") do config :sentry, diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 4fe71f7..faa8d86 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -61,7 +61,7 @@ services: app: image: kith:latest command: ["start"] - hostname: kith-app + hostname: app depends_on: migrate: condition: service_completed_successfully @@ -73,10 +73,11 @@ services: tmpfs: - /tmp:size=64M environment: - # ── BEAM distribution / clustering ── + # ── BEAM distribution / clustering (libcluster Epmd strategy) ── RELEASE_COOKIE: ${RELEASE_COOKIE} RELEASE_DISTRIBUTION: name - DNS_CLUSTER_QUERY: kith-cluster + RELEASE_NODE: kith@app + KITH_CLUSTER_HOSTS: kith@app,kith@worker # ── existing env vars unchanged ── DATABASE_URL: ${DATABASE_URL} SECRET_KEY_BASE: ${SECRET_KEY_BASE} @@ -122,10 +123,6 @@ services: REDIS_URL: ${REDIS_URL:-} # Network TRUSTED_PROXIES: ${TRUSTED_PROXIES:-172.16.0.0/12} - networks: - default: - aliases: - - kith-cluster volumes: - uploads:/app/uploads deploy: @@ -145,7 +142,7 @@ services: worker: image: kith:latest command: ["start"] - hostname: kith-worker + hostname: worker security_opt: - no-new-privileges:true cap_drop: @@ -159,10 +156,11 @@ services: migrate: condition: service_completed_successfully environment: - # ── BEAM distribution / clustering ── + # ── BEAM distribution / clustering (libcluster Epmd strategy) ── RELEASE_COOKIE: ${RELEASE_COOKIE} RELEASE_DISTRIBUTION: name - DNS_CLUSTER_QUERY: kith-cluster + RELEASE_NODE: kith@worker + KITH_CLUSTER_HOSTS: kith@app,kith@worker # ── existing env vars unchanged ── DATABASE_URL: ${DATABASE_URL} SECRET_KEY_BASE: ${SECRET_KEY_BASE} @@ -194,10 +192,6 @@ services: IMMICH_API_KEY: ${IMMICH_API_KEY:-} IMMICH_SYNC_INTERVAL_HOURS: ${IMMICH_SYNC_INTERVAL_HOURS:-24} METRICS_TOKEN: ${METRICS_TOKEN} - networks: - default: - aliases: - - kith-cluster volumes: - uploads:/app/uploads deploy: diff --git a/lib/kith/application.ex b/lib/kith/application.ex index b521fd7..af8913b 100644 --- a/lib/kith/application.ex +++ b/lib/kith/application.ex @@ -27,11 +27,16 @@ defmodule Kith.Application do {Oban, Application.fetch_env!(:kith, Oban)}, {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, {Task.Supervisor, name: Kith.TaskSupervisor}, - # PubSub + DNSCluster live here (not in mode_children) so worker mode - # also starts them. Required for cross-container progress broadcasts - # in the split-deployment topology (`docker-compose.prod.yml`). + # PubSub lives here (not in mode_children) so worker mode also starts + # it. Required for cross-container progress broadcasts in the + # split-deployment topology (app + worker containers). {Phoenix.PubSub, name: Kith.PubSub}, - {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore} + # libcluster: connects this BEAM node to its peer(s) so PubSub spans + # containers. Topology is configured at runtime via env-driven config + # in `runtime.exs`; when no peers are set (dev/test), this supervisor + # starts but does nothing. + {Cluster.Supervisor, + [Application.get_env(:libcluster, :topologies, []), [name: Kith.ClusterSupervisor]]} ] end diff --git a/mix.exs b/mix.exs index 37cfa14..e97f5e2 100644 --- a/mix.exs +++ b/mix.exs @@ -62,7 +62,7 @@ defmodule Kith.MixProject do {:telemetry_poller, "~> 1.0"}, {:gettext, "~> 1.0"}, {:jason, "~> 1.2"}, - {:dns_cluster, "~> 0.2.0"}, + {:libcluster, "~> 3.4"}, {:bandit, "~> 1.5"}, {:heroicons, github: "tailwindlabs/heroicons", diff --git a/mix.lock b/mix.lock index ddafb77..e228409 100644 --- a/mix.lock +++ b/mix.lock @@ -19,7 +19,6 @@ "decimal": {:hex, :decimal, "2.3.0", "3ad6255aa77b4a3c4f818171b12d237500e63525c2fd056699967a3e7ea20f62", [:mix], [], "hexpm", "a4d66355cb29cb47c3cf30e71329e58361cfcb37c34235ef3bf1d7bf3773aeac"}, "dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"}, "digital_token": {:hex, :digital_token, "1.0.0", "454a4444061943f7349a51ef74b7fb1ebd19e6a94f43ef711f7dae88c09347df", [:mix], [{:cldr_utils, "~> 2.17", [hex: :cldr_utils, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "8ed6f5a8c2fa7b07147b9963db506a1b4c7475d9afca6492136535b064c9e9e6"}, - "dns_cluster": {:hex, :dns_cluster, "0.2.0", "aa8eb46e3bd0326bd67b84790c561733b25c5ba2fe3c7e36f28e88f384ebcb33", [:mix], [], "hexpm", "ba6f1893411c69c01b9e8e8f772062535a4cf70f3f35bcc964a324078d8c8240"}, "ecto": {:hex, :ecto, "3.13.5", "9d4a69700183f33bf97208294768e561f5c7f1ecf417e0fa1006e4a91713a834", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df9efebf70cf94142739ba357499661ef5dbb559ef902b68ea1f3c1fabce36de"}, "ecto_sql": {:hex, :ecto_sql, "3.13.5", "2f8282b2ad97bf0f0d3217ea0a6fff320ead9e2f8770f810141189d182dc304e", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aa36751f4e6a2b56ae79efb0e088042e010ff4935fc8684e74c23b1f49e25fdc"}, "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"}, @@ -59,6 +58,7 @@ "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "jumper": {:hex, :jumper, "1.0.2", "68cdcd84472a00ac596b4e6459a41b3062d4427cbd4f1e8c8793c5b54f1406a7", [:mix], [], "hexpm", "9b7782409021e01ab3c08270e26f36eb62976a38c1aa64b2eaf6348422f165e1"}, "lazy_html": {:hex, :lazy_html, "0.1.10", "ffe42a0b4e70859cf21a33e12a251e0c76c1dff76391609bd56702a0ef5bc429", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.9.0", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:fine, "~> 0.1.0", [hex: :fine, repo: "hexpm", optional: false]}], "hexpm", "50f67e5faa09d45a99c1ddf3fac004f051997877dc8974c5797bb5ccd8e27058"}, + "libcluster": {:hex, :libcluster, "3.5.0", "5ee4cfde4bdf32b2fef271e33ce3241e89509f4344f6c6a8d4069937484866ba", [:mix], [{:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.3", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ebf6561fcedd765a4cd43b4b8c04b1c87f4177b5fb3cbdfe40a780499d72f743"}, "logger_json": {:hex, :logger_json, "6.2.1", "a1db30e1164e6057f2328a1e4d6b632b9583c015574fdf6c38cf73721128edcb", [:mix], [{:decimal, ">= 0.0.0", [hex: :decimal, repo: "hexpm", optional: true]}, {:ecto, "~> 3.11", [hex: :ecto, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:plug, "~> 1.15", [hex: :plug, repo: "hexpm", optional: true]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "34acd0bfd419d5fcf08c4108a8a4b59b695fcc60409dc1dd1a868b70c42e1d1f"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, From 785f2c70a7936542bb60d0801f89fe51666f0545 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 13:06:17 +0300 Subject: [PATCH 45/58] fix: use sname distribution for libcluster (bare hostnames are illegal in name mode) --- docker-compose.prod.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index faa8d86..0185aa9 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -75,7 +75,7 @@ services: environment: # ── BEAM distribution / clustering (libcluster Epmd strategy) ── RELEASE_COOKIE: ${RELEASE_COOKIE} - RELEASE_DISTRIBUTION: name + RELEASE_DISTRIBUTION: sname RELEASE_NODE: kith@app KITH_CLUSTER_HOSTS: kith@app,kith@worker # ── existing env vars unchanged ── @@ -158,7 +158,7 @@ services: environment: # ── BEAM distribution / clustering (libcluster Epmd strategy) ── RELEASE_COOKIE: ${RELEASE_COOKIE} - RELEASE_DISTRIBUTION: name + RELEASE_DISTRIBUTION: sname RELEASE_NODE: kith@worker KITH_CLUSTER_HOSTS: kith@app,kith@worker # ── existing env vars unchanged ── From f4dfb8d8df82536993d7a883b042385e53c057e7 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 21:20:26 +0300 Subject: [PATCH 46/58] docs(specs): design for phone display format fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec for the follow-up to PR #23's Monica normalization work — replaces the hand-rolled NANP-only renderer in PhoneFormatter.format/2 with ExPhoneNumber library calls so account.phone_format honors non-US phones. --- ...6-05-16-phone-display-format-fix-design.md | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-16-phone-display-format-fix-design.md diff --git a/docs/superpowers/specs/2026-05-16-phone-display-format-fix-design.md b/docs/superpowers/specs/2026-05-16-phone-display-format-fix-design.md new file mode 100644 index 0000000..02e7ec1 --- /dev/null +++ b/docs/superpowers/specs/2026-05-16-phone-display-format-fix-design.md @@ -0,0 +1,187 @@ +# Phone Display Format Fix — Design + +**Date:** 2026-05-16 +**Status:** Draft +**Branch base:** `feat/v0.x-multi-area-improvements` (PR #23) + +## Problem + +The account-level `phone_format` setting (`e164` / `national` / `international` / `raw`, +default `e164`) is read at render time in +`KithWeb.ContactLive.ContactFieldsComponent`, which delegates display formatting +to `Kith.Contacts.PhoneFormatter.format/2`. The current implementation of +`format/2` only formats numbers in the NANP region (country code `+1`) via a +hand-rolled binary pattern at `lib/kith/contacts/phone_formatter.ex:176-192`. +Every non-NANP phone (French `+33…`, German `+49…`, Saudi `+966…`, Japanese +`+81…`, etc.) silently falls through the catch-all clause and renders unchanged +as raw E.164. + +User-visible symptom (the reported one): after importing French/EU contacts +from Monica, a user sets *Phone Number Format → National* in Account Settings +and still sees `+33123456789` everywhere instead of `01 23 45 67 89`. From the +user's perspective the account setting is silently ignored. + +## Goal + +Make `account.phone_format` honor the user's preference for **every parseable +stored phone**, regardless of country, using the `ExPhoneNumber` library +(libphonenumber port) that the codebase already depends on. + +## Non-Goals + +- No changes to storage. E.164 remains the canonical storage form. +- No changes to write paths. vCard import, CardDAV PUT, REST API + `ContactFieldController`, and manual-edit normalization gaps are real + but tracked separately. +- No new account schema fields. No promotion of "phone default region" to a + first-class setting. +- No changes to the Account Settings UI copy. +- No re-normalization or backfill of existing data — this is a pure display + change. + +## Design + +### Single change point + +`Kith.Contacts.PhoneFormatter.format/2` in +`lib/kith/contacts/phone_formatter.ex`. + +The hand-rolled NANP pattern matches at lines 176-192 (`format_national/1`, +`format_international/1`) are deleted. All four format heads remain as +public clauses with new behavior: + +``` +format(nil, _) -> nil +format("", _) -> nil # new — currently undefined +format(v, "raw") -> v # unchanged +format(v, "e164") -> v # unchanged +format(v, "national") -> render(v, :national) +format(v, "international")-> render(v, :international) +format(v, _other) -> v # unchanged — defensive catch-all +``` + +`render/2` (private): + +```elixir +defp render(value, library_format) do + case ExPhoneNumber.parse(value, nil) do + {:ok, parsed} -> ExPhoneNumber.format(parsed, library_format) + {:error, _} -> value + end +end +``` + +- `ExPhoneNumber.parse(value, nil)` passes `nil` as the default region. Stored + values are E.164 with a `+` prefix, so the parser uses the carried country + code and produces a `PhoneNumber` struct that knows its own country. No + account-level region is consulted at display time. +- `ExPhoneNumber.format/2` accepts the format-type atoms `:e164`, `:national`, + `:international`, `:rfc3966`. We use `:national` and `:international`. +- On parse failure (legacy bare numbers like `"5551234567"` written before + the normalization work in PR #23, or truly garbage input), return the + stored value unchanged. This matches the existing + `PhoneFormatter.normalize/2` philosophy of never destroying user data. + +### What gets deleted + +- `defp format_national/1` head with the NANP-only binary pattern +- `defp format_national(phone), do: phone` fallback +- `defp format_international/1` head with the NANP-only binary pattern +- `defp format_international(phone), do: phone` fallback + +Total: ~16 lines removed. + +### What stays + +- `PhoneFormatter.normalize/1`, `normalize/2` — storage canonicalization. Untouched. +- `PhoneFormatter.region_for_locale/1` — locale→region mapping used by Monica + wizard and `PhoneRenormalizeWorker`. Untouched. +- `PhoneFormatter.supported_regions/1` — wizard dropdown source. Untouched. +- The `"raw"` and `"e164"` heads of `format/2` — pass-through. Untouched. + +### Surface-area audit + +A repo-wide `grep` confirms that phone display goes through exactly one path: + +``` +lib/kith_web/live/contact_live/contact_fields_component.ex:101 + PhoneFormatter.format(field.value, phone_format) +``` + +invoked from `show.html.heex:288` and `show.html.heex:417` (contact-show page, +both desktop and mobile layouts). No other LiveView or controller renders a +phone number through human-facing UI today. + +The REST API (`contact_json.ex:113`) returns `cf.value` raw — this is correct +behavior for an API contract and is **not** changed. + +A moduledoc note will be added to `PhoneFormatter` warning future contributors +that any new UI surface displaying a phone must call `format/2` with the +account's `phone_format`. + +## Test Plan + +**File:** `test/kith/contacts/phone_formatter_test.exs` (exists; has a +`describe "format/2"` block at lines 136-164). + +**Tests to replace** (these currently encode the bug as expected behavior): + +- Line 153-155 — `"national falls back for non-US numbers"` asserts + `format("+442079460958", "national") == "+442079460958"`. Replace with an + assertion that the GB national format is produced (exact string verified + against `ExPhoneNumber.format/2` output during implementation). +- Line 157-159 — `"international falls back for non-US numbers"` asserts + unchanged output. Replace with the proper international rendering. + +**Tests to add:** the full coverage matrix below. + +**Test to keep:** lines 137-150 (US national/international, e164, raw) — they +remain correct. + +Coverage matrix: + +| Input value | `e164` | `national` | `international` | `raw` | +|-----------------|-----------------|-------------------|----------------------|-----------------| +| `+12025550100` | `+12025550100` | `(202) 555-0100` | `+1 202-555-0100` | `+12025550100` | +| `+33123456789` | `+33123456789` | `01 23 45 67 89` | `+33 1 23 45 67 89` | `+33123456789` | +| `+493012345678` | `+493012345678` | `030 12345678` | `+49 30 12345678` | `+493012345678` | +| `+819012345678` | `+819012345678` | `090-1234-5678` | `+81 90-1234-5678` | `+819012345678` | +| `+966501234567` | `+966501234567` | `050 123 4567` | `+966 50 123 4567` | `+966501234567` | +| `5551234567` | `5551234567` | `5551234567` | `5551234567` | `5551234567` | +| `garbage` | `garbage` | `garbage` | `garbage` | `garbage` | +| `nil` | `nil` | `nil` | `nil` | `nil` | +| `""` | `nil` | `nil` | `nil` | `nil` | + +Exact rendered strings for non-US cases will be verified against libphonenumber +output during implementation (libphonenumber's formatting may use NBSP or +different separators per locale; the tests should match what the library +actually produces, not what the spec author guessed). + +A regression test will assert that the `ContactFieldsComponent.display_value/2` +private helper returns a properly-formatted national string when given a +French phone and `phone_format: "national"` (component-level test using +ExUnit + LiveView test helpers). + +## Risks + +- **None to data.** Pure rendering change; storage untouched. +- **Library behavior drift.** If a future version of `ex_phone_number` changes + its national-format output for any tested locale, the matrix tests will + catch it. The tests pin behavior. +- **Library raises on edge input.** If `ExPhoneNumber.format/2` ever raises + (it shouldn't on a successfully-parsed number, but defensive coding helps), + a `try/rescue` around the library call returning the stored value would be + added. Initial implementation will not include the rescue; it will be added + only if a failing case surfaces in test or production telemetry. + +## Acceptance Criteria + +1. The test matrix above passes. +2. The four-line NANP-only binary-pattern helpers are removed from + `phone_formatter.ex`. +3. `mix quality` (compile + format + credo + sobelow + dialyzer) passes. +4. Manual smoke check: log in with a test account holding French and US + phones, switch *Phone Number Format* between `national` and + `international` in Account Settings, confirm both numbers re-render + correctly on the contact-show page. +5. PR description references this spec by path. From 67f36882fc66495aab225006b353e285467e4788 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 21:58:28 +0300 Subject: [PATCH 47/58] docs(plans): implementation plan for phone display format fix Bite-sized TDD plan to replace the NANP-only renderer in PhoneFormatter.format/2 with ExPhoneNumber library calls. Includes IEx probe step to capture exact library output strings, replacement of bug-pinning tests, non-NANP coverage matrix, and a Playwright extension to guard the e2e display path. --- .../2026-05-16-phone-display-format-fix.md | 582 ++++++++++++++++++ 1 file changed, 582 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-16-phone-display-format-fix.md diff --git a/docs/superpowers/plans/2026-05-16-phone-display-format-fix.md b/docs/superpowers/plans/2026-05-16-phone-display-format-fix.md new file mode 100644 index 0000000..d755f31 --- /dev/null +++ b/docs/superpowers/plans/2026-05-16-phone-display-format-fix.md @@ -0,0 +1,582 @@ +# Phone Display Format Fix Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `account.phone_format` honor the user's preference for every parseable stored phone, regardless of country, by replacing the hand-rolled NANP-only renderer with `ExPhoneNumber` library calls. + +**Architecture:** Pure rendering change to `Kith.Contacts.PhoneFormatter.format/2`. Parses the stored E.164 value (which carries its own country code), then asks `ExPhoneNumber` for the requested format. On parse failure, returns the stored value unchanged so user data is never destroyed. No storage changes, no schema changes, no new account fields. + +**Tech Stack:** Elixir, Phoenix LiveView, ExUnit, ExPhoneNumber (libphonenumber port, already a dependency in this branch via `mix.exs`). + +**Design spec:** `docs/superpowers/specs/2026-05-16-phone-display-format-fix-design.md` + +--- + +## Pre-flight + +These steps assume you are working in the existing `.worktrees/pr-23-review/` worktree on branch `pr-23-review`. If you are landing this as a separate follow-up PR rather than stacked on PR #23, branch off `main` and cherry-pick the design-spec commit first. + +- [ ] **Step 0a: Confirm you are in the right worktree** + +```bash +pwd +# Expected: /Users/basharqassis/projects/kith/.worktrees/pr-23-review +git branch --show-current +# Expected: pr-23-review (or your fix branch) +``` + +- [ ] **Step 0b: Fetch dependencies if not already present** + +```bash +mix deps.get +``` + +Expected: `* Getting ex_phone_number (Hex package)` or "All dependencies are up to date" if already fetched. + +- [ ] **Step 0c: Verify baseline tests pass for the file you're about to change** + +```bash +mix test test/kith/contacts/phone_formatter_test.exs +``` + +Expected: all current tests pass (including the two that pin the bug — that's the baseline). + +--- + +## Task 1: Discover the exact library output strings for the test matrix + +**Why this task exists:** libphonenumber's national/international format strings can include non-breaking spaces (` `), different separator characters, and per-locale conventions. The spec's matrix lists *illustrative* output. Real test assertions must match what `ExPhoneNumber.format/2` actually produces on this version of the library, otherwise tests will fail on string mismatch even when the implementation is correct. + +**Files:** +- Read: `lib/kith/contacts/phone_formatter.ex:65-74` (the existing `parse_to_e164` helper — confirms `ExPhoneNumber.parse/2` API) + +- [ ] **Step 1.1: Open IEx with the project loaded** + +```bash +iex -S mix +``` + +- [ ] **Step 1.2: Probe each row of the matrix and record exact library output** + +Run each of the following at the IEx prompt and **write the actual returned strings into a scratchpad** (a sticky note, a comment in your editor, whatever). You will paste them into the tests in Task 3. + +```elixir +alias ExPhoneNumber + +# Helper to run both formats for one E.164 input +probe = fn e164 -> + {:ok, p} = ExPhoneNumber.parse(e164, nil) + %{ + national: ExPhoneNumber.format(p, :national), + international: ExPhoneNumber.format(p, :international) + } +end + +probe.("+12025550100") # US +probe.("+12345678901") # US (used in existing tests at line 137-150) +probe.("+33123456789") # FR +probe.("+493012345678") # DE +probe.("+819012345678") # JP +probe.("+966501234567") # SA +probe.("+442079460958") # GB (used in existing bug-pinning tests at line 153-159) +``` + +Expected: each call returns `%{national: "...", international: "..."}` with formatted strings. Some strings may include ` ` (NBSP) — copy them **byte-exact**, not "what they look like printed". + +- [ ] **Step 1.3: Exit IEx** + +```elixir +:init.stop() +``` + +--- + +## Task 2: Rewrite `PhoneFormatter.format/2` to use the library + +**Files:** +- Modify: `lib/kith/contacts/phone_formatter.ex:169-192` + +The current code (lines 169-192) ends like this — these are the lines you replace: + +```elixir +def format(nil, _format), do: nil +def format(phone, "raw"), do: phone +def format(phone, "e164"), do: phone +def format(phone, "national"), do: format_national(phone) +def format(phone, "international"), do: format_international(phone) +def format(phone, _), do: phone + +defp format_national( + <<"+"::utf8, ?1, area::binary-size(3), prefix::binary-size(3), line::binary-size(4)>> + ) + when byte_size(area) == 3 do + "(#{area}) #{prefix}-#{line}" +end + +defp format_national(phone), do: phone + +defp format_international( + <<"+"::utf8, ?1, area::binary-size(3), prefix::binary-size(3), line::binary-size(4)>> + ) + when byte_size(area) == 3 do + "+1 #{area}-#{prefix}-#{line}" +end + +defp format_international(phone), do: phone +``` + +- [ ] **Step 2.1: Read the file to anchor the Edit tool** + +Open `lib/kith/contacts/phone_formatter.ex` and locate line 169. + +- [ ] **Step 2.2: Replace the `format/2` heads and helper privates** + +Replace the block at lines 169-192 with this exact code: + +```elixir +def format(nil, _format), do: nil +def format("", _format), do: nil +def format(phone, "raw"), do: phone +def format(phone, "e164"), do: phone +def format(phone, "national"), do: render(phone, :national) +def format(phone, "international"), do: render(phone, :international) +def format(phone, _), do: phone + +defp render(value, library_format) do + case ExPhoneNumber.parse(value, nil) do + {:ok, parsed} -> ExPhoneNumber.format(parsed, library_format) + {:error, _} -> value + end +end +``` + +Notes for the implementer: +- The `format("", _format), do: nil` clause is new. It makes empty input behave like `nil` input, matching the existing `normalize/2` behavior at lines 42-43. +- The `format(phone, _), do: phone` catch-all stays — it covers unknown format strings (defensive against typos/migrations of the `phone_format` field). +- Do **not** rescue exceptions from `ExPhoneNumber.format/2`. The library returns a string for any parsed phone; raising would be a library bug we want to surface, not hide. If telemetry later shows a real production case, add a rescue then. + +- [ ] **Step 2.3: Confirm the file still compiles** + +```bash +mix compile --warnings-as-errors +``` + +Expected: clean compile, no warnings. + +- [ ] **Step 2.4: Confirm `mix format` produces no diff** + +```bash +mix format +git diff --stat lib/kith/contacts/phone_formatter.ex +``` + +Expected: still only your edit on the diff — no auto-format noise. + +--- + +## Task 3: Replace the bug-pinning tests with correct ones + +**Files:** +- Modify: `test/kith/contacts/phone_formatter_test.exs:153-159` (delete; replace with full non-NANP coverage) +- Modify: `test/kith/contacts/phone_formatter_test.exs:161-163` (extend `nil` test; add `""` test) + +The current `describe "format/2"` block (lines 136-164) has two tests that encode the bug as expected behavior. They must be **deleted**, not added to. + +- [ ] **Step 3.1: Delete the two bug-pinning tests** + +Use `Edit` on `test/kith/contacts/phone_formatter_test.exs` to remove this block exactly: + +```elixir + test "national falls back for non-US numbers" do + assert "+442079460958" = PhoneFormatter.format("+442079460958", "national") + end + + test "international falls back for non-US numbers" do + assert "+442079460958" = PhoneFormatter.format("+442079460958", "international") + end +``` + +- [ ] **Step 3.2: Run the test file to confirm baseline state** + +```bash +mix test test/kith/contacts/phone_formatter_test.exs +``` + +Expected: all remaining tests pass. The two deleted tests are gone. + +- [ ] **Step 3.3: Add the failing test for GB national formatting** + +Insert the following at the same position the deleted tests occupied (still inside `describe "format/2"`). Use the exact GB national string you recorded from Task 1.2 — replace the `` placeholder below with that string. + +```elixir + test "national formats GB number" do + assert "" = + PhoneFormatter.format("+442079460958", "national") + end +``` + +- [ ] **Step 3.4: Run only the new test and confirm it FAILS first if you skipped Task 2** + +If you are doing strict TDD and inserted this test before Task 2, run: + +```bash +mix test test/kith/contacts/phone_formatter_test.exs --only line: +``` + +Expected (pre-Task-2): FAIL with `match (=) failed` showing the actual mismatch. +Expected (post-Task-2): PASS. + +If you already did Task 2, run the test and expect PASS: + +```bash +mix test test/kith/contacts/phone_formatter_test.exs +``` + +- [ ] **Step 3.5: Add the GB international test** + +Insert next to the GB national test. Replace `` with your Task 1.2 output for `+442079460958` in international. + +```elixir + test "international formats GB number" do + assert "" = + PhoneFormatter.format("+442079460958", "international") + end +``` + +- [ ] **Step 3.6: Add French, German, Japanese, Saudi national + international tests** + +Use Task 1.2 outputs. Insert all eight tests inside `describe "format/2"`: + +```elixir + test "national formats FR number" do + assert "" = PhoneFormatter.format("+33123456789", "national") + end + + test "international formats FR number" do + assert "" = + PhoneFormatter.format("+33123456789", "international") + end + + test "national formats DE number" do + assert "" = + PhoneFormatter.format("+493012345678", "national") + end + + test "international formats DE number" do + assert "" = + PhoneFormatter.format("+493012345678", "international") + end + + test "national formats JP number" do + assert "" = + PhoneFormatter.format("+819012345678", "national") + end + + test "international formats JP number" do + assert "" = + PhoneFormatter.format("+819012345678", "international") + end + + test "national formats SA number" do + assert "" = + PhoneFormatter.format("+966501234567", "national") + end + + test "international formats SA number" do + assert "" = + PhoneFormatter.format("+966501234567", "international") + end +``` + +- [ ] **Step 3.7: Add unparseable-input tests (`"5551234567"` and `"garbage"`)** + +These guard the "never destroy user data" contract on parse failure: + +```elixir + test "national leaves bare-number legacy value unchanged" do + assert "5551234567" = PhoneFormatter.format("5551234567", "national") + end + + test "international leaves bare-number legacy value unchanged" do + assert "5551234567" = PhoneFormatter.format("5551234567", "international") + end + + test "national leaves unparseable input unchanged" do + assert "garbage" = PhoneFormatter.format("garbage", "national") + end + + test "international leaves unparseable input unchanged" do + assert "garbage" = PhoneFormatter.format("garbage", "international") + end +``` + +- [ ] **Step 3.8: Extend the empty-string and nil coverage** + +Replace the existing single nil-test (lines 161-163 in the original file) with the full nil/empty matrix: + +```elixir + test "nil returns nil for every format" do + for fmt <- ["e164", "national", "international", "raw"] do + assert is_nil(PhoneFormatter.format(nil, fmt)), "expected nil for format=#{fmt}" + end + end + + test "empty string returns nil for every format" do + for fmt <- ["e164", "national", "international", "raw"] do + assert is_nil(PhoneFormatter.format("", fmt)), "expected nil for format=#{fmt}" + end + end +``` + +- [ ] **Step 3.9: Run the full test file and confirm green** + +```bash +mix test test/kith/contacts/phone_formatter_test.exs +``` + +Expected: all tests pass (existing US tests + 8 new non-NANP tests + 4 unparseable tests + nil/empty matrix tests). + +If any assertion fails because the recorded library string doesn't match: re-check your Task 1.2 capture — most likely you copied a printed representation rather than the raw bytes (NBSP vs space). Re-probe in IEx and use `IO.inspect(s, binaries: :as_binaries)` to see the byte sequence. + +--- + +## Task 4: Update the moduledoc + +**Files:** +- Modify: `lib/kith/contacts/phone_formatter.ex:1-13` + +The current moduledoc claims `format/2` "renders the stored E.164 value as national/international/raw" — which was misleading because non-NANP rendering was broken. Tighten the language and add a brief contributor note. + +- [ ] **Step 4.1: Replace the moduledoc** + +Replace lines 1-13 with: + +```elixir +defmodule Kith.Contacts.PhoneFormatter do + @moduledoc """ + Phone number normalization (E.164 for storage) and display formatting. + + Storage form is E.164 when the value can be parsed as a valid international + number — either because it carries a `+` country-code prefix, or because the + caller supplies a `default_region` (ISO 3166-1 alpha-2) for bare numbers. + Unparseable input is returned trimmed-but-otherwise-unchanged so user data + is never silently destroyed. + + Display formatting (`format/2`) reads the account's `phone_format` + preference and renders the stored value via the `ExPhoneNumber` + (libphonenumber) library. The phone's country code (carried in the stored + E.164 value) determines national-format conventions; the account's region + is not consulted at display time. Unparseable stored values pass through + unchanged. + + **Contributors:** any UI surface that displays a phone number for a human + must call `format/2` with the account's `phone_format` setting, otherwise + the user's display preference is silently ignored. API/JSON responses are + exempt — those return the canonical E.164 storage value. + """ +``` + +- [ ] **Step 4.2: Confirm compile + format still clean** + +```bash +mix compile --warnings-as-errors && mix format +git diff --stat +``` + +Expected: only your edits in the diff. + +--- + +## Task 5: Extend the existing Playwright spec with a non-NANP case + +**Files:** +- Modify: `test/playwright/phone-format.spec.ts` + +The existing spec covers NANP `(234) 567-8901` and `+1 234-567-8901` — i.e., exactly the country where the broken implementation already worked. It does not exercise any non-NANP path, which is why the bug was never caught. + +**Why a Playwright spec instead of an LV component unit test:** the existing Playwright spec already covers the contact-show → settings → re-render cycle end-to-end. Building a parallel LV component test would duplicate that with a brittler surface (private function tested through HTML assertion + full ConnCase fixtures). Extending what's already there is the right shape. + +- [ ] **Step 5.1: Add a non-NANP test for National format** + +Open `test/playwright/phone-format.spec.ts`. Insert this test inside the existing `test.describe("Phone Number Formatting", () => { ... })` block, immediately after the existing `"phone displayed in National format"` test (line ~98). Use your Task 1.2 recorded GB national string in place of ``: + +```ts + test("non-NANP phone displayed in National format", async ({ page }) => { + // Change setting to National + await page.goto("/settings/account"); + await page.waitForLoadState("networkidle"); + await page.waitForTimeout(300); + + const select = page.locator('select[name="account[phone_format]"]'); + if ((await select.count()) > 0) { + await select.selectOption("national"); + await page.getByRole("button", { name: /save/i }).first().click(); + await page.waitForTimeout(500); + } + + // Add a GB phone to the contact + await goToContact(page, contactId); + await addPhoneToContact(page, "+44 20 7946 0958"); + + // Should render in GB national format (NOT raw E.164) + const content = await page.content(); + expect(content).toContain(""); + }); +``` + +- [ ] **Step 5.2: Add a non-NANP test for International format** + +Immediately after the previous test. Replace ``: + +```ts + test("non-NANP phone displayed in International format", async ({ page }) => { + await page.goto("/settings/account"); + await page.waitForLoadState("networkidle"); + await page.waitForTimeout(300); + + const select = page.locator('select[name="account[phone_format]"]'); + if ((await select.count()) > 0) { + await select.selectOption("international"); + await page.getByRole("button", { name: /save/i }).first().click(); + await page.waitForTimeout(500); + } + + await goToContact(page, contactId); + await addPhoneToContact(page, "+44 20 7946 0958"); + + const content = await page.content(); + expect(content).toContain(""); + }); +``` + +- [ ] **Step 5.3: Run the Playwright project locally (optional but recommended)** + +Playwright needs a running dev server. In one terminal: + +```bash +mix phx.server +``` + +In another: + +```bash +npx playwright test --project=e2e test/playwright/phone-format.spec.ts +``` + +Expected: all tests pass, including the two new non-NANP ones. + +If you can't run Playwright locally, CI will run it on push — the new tests guard the contact-show display path against future regressions either way. + +--- + +## Task 6: Full quality gate + manual smoke + +- [ ] **Step 6.1: Full test suite** + +```bash +mix test +``` + +Expected: 0 failures. + +- [ ] **Step 6.2: Static analysis** + +```bash +mix quality +``` + +Expected: clean across format, credo, sobelow, dialyzer. + +- [ ] **Step 6.3: Manual smoke in dev** + +```bash +mix phx.server +``` + +In a browser: +1. Log in as a test user. +2. Create or pick a contact, add a French phone (`+33123456789`) and a US phone (`+12025550100`). +3. Navigate to Account Settings, set *Phone Number Format* to `National`. Save. +4. Go back to the contact-show page. Both phones should render in their respective national formats. +5. Set the preference to `International`. Both phones should re-render accordingly. +6. Set the preference to `E.164` and `Raw`. Both phones should render as `+33123456789` / `+12025550100`. + +Document the results in your commit message or PR description (a screenshot is ideal but not required). + +--- + +## Task 7: Commit and open PR + +- [ ] **Step 7.1: Stage and commit** + +```bash +git add lib/kith/contacts/phone_formatter.ex \ + test/kith/contacts/phone_formatter_test.exs \ + test/playwright/phone-format.spec.ts + +git -c commit.gpgsign=false commit -m "$(cat <<'EOF' +fix(phone): honor account.phone_format for non-NANP numbers + +The hand-rolled NANP binary-pattern formatter in PhoneFormatter.format/2 +only handled +1 numbers; every other country silently fell through and +rendered as raw E.164 regardless of the account's display preference. +Replace with ExPhoneNumber.format/2 which uses the phone's own country +code to drive locale-correct national/international rendering. + +- Unparseable values pass through unchanged (matches normalize/2 contract). +- Empty string now returns nil consistently with nil input. +- Bug-pinning tests at lines 153-159 of phone_formatter_test.exs are + replaced with the correct expected output for GB, FR, DE, JP, SA. +- Playwright spec extended with non-NANP National/International tests + to guard the end-to-end display path against future regressions. + +Spec: docs/superpowers/specs/2026-05-16-phone-display-format-fix-design.md +EOF +)" +``` + +- [ ] **Step 7.2: Push and open PR** + +If working stacked on PR #23: + +```bash +git push -u origin pr-23-review +gh pr view 23 +``` + +If landing as a separate follow-up PR off `main`: + +```bash +git push -u origin +gh pr create --title "fix(phone): honor account.phone_format for non-NANP numbers" --body "$(cat <<'EOF' +## Summary +- Replaces the NANP-only hand-rolled regex in PhoneFormatter.format/2 with ExPhoneNumber library calls +- account.phone_format now works for every country, not just +1 +- Bug-pinning tests replaced with correct expected output +- Component smoke test guards against future regressions + +## Spec +docs/superpowers/specs/2026-05-16-phone-display-format-fix-design.md + +## Test plan +- [x] mix test (full suite, 0 failures) +- [x] mix quality (format/credo/sobelow/dialyzer) +- [x] Manual smoke: French + US contact, toggle phone_format across all four values, both phones re-render correctly each time +EOF +)" +``` + +--- + +## Done Criteria + +1. `mix test` reports 0 failures. +2. `mix quality` is clean. +3. The two bug-pinning tests at `phone_formatter_test.exs:153-159` are gone; replaced with correct GB rendering assertions. +4. Eight new non-NANP tests pass (FR, DE, JP, SA × national, international). +5. Four legacy/garbage-input tests pass. +6. nil and empty-string matrix tests pass. +7. Playwright spec has two new non-NANP tests covering National + International formats (running them locally is optional; CI enforces). +8. Manual smoke confirms phone_format toggle works for both NANP and non-NANP phones. +9. The hand-rolled `format_national/format_international` private helpers and their fall-through clauses are removed from `phone_formatter.ex`. +10. The moduledoc reflects the new behavior and includes the contributor warning. From e39a6d527dd557c17f63b3b8a24537c80e9ccd22 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 22:07:40 +0300 Subject: [PATCH 48/58] fix(phone): replace NANP-only renderer with ExPhoneNumber library calls PhoneFormatter.format/2 honored account.phone_format only for +1 numbers because format_national/format_international were hand-rolled binary patterns matching the NANP shape. Every non-NANP phone fell through to the unchanged- pass-through clause, silently ignoring the user's display preference. Replace with ExPhoneNumber.format/2 (libphonenumber port already declared as :ex_phone_number in mix.exs). The phone's own country code drives the national rendering; unparseable input passes through unchanged, matching the existing normalize/2 contract. Tests at lines 153-159 of phone_formatter_test.exs encoded the bug as expected behavior; replaced with correct GB rendering plus FR/DE/JP/SA coverage and legacy/garbage/empty-string tests. --- lib/kith/contacts/phone_formatter.ex | 26 +++----- test/kith/contacts/phone_formatter_test.exs | 68 +++++++++++++++++++-- 2 files changed, 70 insertions(+), 24 deletions(-) diff --git a/lib/kith/contacts/phone_formatter.ex b/lib/kith/contacts/phone_formatter.ex index 4517e15..e3daa64 100644 --- a/lib/kith/contacts/phone_formatter.ex +++ b/lib/kith/contacts/phone_formatter.ex @@ -167,27 +167,17 @@ defmodule Kith.Contacts.PhoneFormatter do * `"raw"` — return the stored value unchanged """ def format(nil, _format), do: nil + def format("", _format), do: nil def format(phone, "raw"), do: phone def format(phone, "e164"), do: phone - def format(phone, "national"), do: format_national(phone) - def format(phone, "international"), do: format_international(phone) + def format(phone, "national"), do: render(phone, :national) + def format(phone, "international"), do: render(phone, :international) def format(phone, _), do: phone - defp format_national( - <<"+"::utf8, ?1, area::binary-size(3), prefix::binary-size(3), line::binary-size(4)>> - ) - when byte_size(area) == 3 do - "(#{area}) #{prefix}-#{line}" - end - - defp format_national(phone), do: phone - - defp format_international( - <<"+"::utf8, ?1, area::binary-size(3), prefix::binary-size(3), line::binary-size(4)>> - ) - when byte_size(area) == 3 do - "+1 #{area}-#{prefix}-#{line}" + defp render(value, library_format) do + case ExPhoneNumber.parse(value, nil) do + {:ok, parsed} -> ExPhoneNumber.format(parsed, library_format) + {:error, _} -> value + end end - - defp format_international(phone), do: phone end diff --git a/test/kith/contacts/phone_formatter_test.exs b/test/kith/contacts/phone_formatter_test.exs index 0b35009..03c658a 100644 --- a/test/kith/contacts/phone_formatter_test.exs +++ b/test/kith/contacts/phone_formatter_test.exs @@ -150,16 +150,72 @@ defmodule Kith.Contacts.PhoneFormatterTest do assert "+1 234-567-8901" = PhoneFormatter.format("+12345678901", "international") end - test "national falls back for non-US numbers" do - assert "+442079460958" = PhoneFormatter.format("+442079460958", "national") + test "national formats GB number" do + assert "020 7946 0958" = PhoneFormatter.format("+442079460958", "national") end - test "international falls back for non-US numbers" do - assert "+442079460958" = PhoneFormatter.format("+442079460958", "international") + test "international formats GB number" do + assert "+44 20 7946 0958" = PhoneFormatter.format("+442079460958", "international") end - test "nil returns nil" do - assert nil == PhoneFormatter.format(nil, "e164") + test "national formats FR number" do + assert "01 23 45 67 89" = PhoneFormatter.format("+33123456789", "national") + end + + test "international formats FR number" do + assert "+33 1 23 45 67 89" = PhoneFormatter.format("+33123456789", "international") + end + + test "national formats DE number" do + assert "030 12345678" = PhoneFormatter.format("+493012345678", "national") + end + + test "international formats DE number" do + assert "+49 30 12345678" = PhoneFormatter.format("+493012345678", "international") + end + + test "national formats JP number" do + assert "090-1234-5678" = PhoneFormatter.format("+819012345678", "national") + end + + test "international formats JP number" do + assert "+81 90-1234-5678" = PhoneFormatter.format("+819012345678", "international") + end + + test "national formats SA number" do + assert "050 123 4567" = PhoneFormatter.format("+966501234567", "national") + end + + test "international formats SA number" do + assert "+966 50 123 4567" = PhoneFormatter.format("+966501234567", "international") + end + + test "national leaves bare-number legacy value unchanged" do + assert "5551234567" = PhoneFormatter.format("5551234567", "national") + end + + test "international leaves bare-number legacy value unchanged" do + assert "5551234567" = PhoneFormatter.format("5551234567", "international") + end + + test "national leaves unparseable input unchanged" do + assert "garbage" = PhoneFormatter.format("garbage", "national") + end + + test "international leaves unparseable input unchanged" do + assert "garbage" = PhoneFormatter.format("garbage", "international") + end + + test "nil returns nil for every format" do + for fmt <- ["e164", "national", "international", "raw"] do + assert is_nil(PhoneFormatter.format(nil, fmt)), "expected nil for format=#{fmt}" + end + end + + test "empty string returns nil for every format" do + for fmt <- ["e164", "national", "international", "raw"] do + assert is_nil(PhoneFormatter.format("", fmt)), "expected nil for format=#{fmt}" + end end end end From cd74b851c330282206f0c03308b5d26b988e2541 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 22:12:33 +0300 Subject: [PATCH 49/58] docs(phone): update PhoneFormatter moduledoc + rename render arg Moduledoc now reflects the ExPhoneNumber-driven implementation and warns contributors that any new phone-display UI must call format/2 with the account's phone_format setting. Also rename render/2's parameter from library_format to phone_number_format with a clarifying inline comment, addressing a code-review nit about the overload with the account's phone_format string field. --- lib/kith/contacts/phone_formatter.ex | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/kith/contacts/phone_formatter.ex b/lib/kith/contacts/phone_formatter.ex index e3daa64..5dfd77b 100644 --- a/lib/kith/contacts/phone_formatter.ex +++ b/lib/kith/contacts/phone_formatter.ex @@ -9,7 +9,16 @@ defmodule Kith.Contacts.PhoneFormatter do is never silently destroyed. Display formatting (`format/2`) reads the account's `phone_format` - preference and renders the stored E.164 value as national/international/raw. + preference and renders the stored value via the `ExPhoneNumber` + (libphonenumber) library. The phone's country code (carried in the stored + E.164 value) determines national-format conventions; the account's region + is not consulted at display time. Unparseable stored values pass through + unchanged. + + **Contributors:** any UI surface that displays a phone number for a human + must call `format/2` with the account's `phone_format` setting, otherwise + the user's display preference is silently ignored. API/JSON responses are + exempt — those return the canonical E.164 storage value. """ alias ExPhoneNumber @@ -174,9 +183,10 @@ defmodule Kith.Contacts.PhoneFormatter do def format(phone, "international"), do: render(phone, :international) def format(phone, _), do: phone - defp render(value, library_format) do + defp render(value, phone_number_format) do + # phone_number_format is an ExPhoneNumber atom: :national or :international. case ExPhoneNumber.parse(value, nil) do - {:ok, parsed} -> ExPhoneNumber.format(parsed, library_format) + {:ok, parsed} -> ExPhoneNumber.format(parsed, phone_number_format) {:error, _} -> value end end From 375c98e011c2c6896541a03ad1dce49e98812dd6 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sat, 16 May 2026 22:13:52 +0300 Subject: [PATCH 50/58] test(phone): add non-NANP Playwright coverage for display format The existing Playwright spec only exercises +1 numbers, which is precisely the country where the NANP-only renderer happened to work. Adding GB national + international cases guards the e2e display path against a regression where someone reintroduces locale-specific hand-rolled rendering. --- test/playwright/phone-format.spec.ts | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/playwright/phone-format.spec.ts b/test/playwright/phone-format.spec.ts index 504ed0b..65cfbb3 100644 --- a/test/playwright/phone-format.spec.ts +++ b/test/playwright/phone-format.spec.ts @@ -97,6 +97,47 @@ test.describe("Phone Number Formatting", () => { expect(content).toContain("(234) 567-8901"); }); + test("non-NANP phone displayed in National format", async ({ page }) => { + // Change setting to National + await page.goto("/settings/account"); + await page.waitForLoadState("networkidle"); + await page.waitForTimeout(300); + + const select = page.locator('select[name="account[phone_format]"]'); + if ((await select.count()) > 0) { + await select.selectOption("national"); + await page.getByRole("button", { name: /save/i }).first().click(); + await page.waitForTimeout(500); + } + + // Add a GB phone to the contact + await goToContact(page, contactId); + await addPhoneToContact(page, "+44 20 7946 0958"); + + // Should render in GB national format (NOT raw E.164) + const content = await page.content(); + expect(content).toContain("020 7946 0958"); + }); + + test("non-NANP phone displayed in International format", async ({ page }) => { + await page.goto("/settings/account"); + await page.waitForLoadState("networkidle"); + await page.waitForTimeout(300); + + const select = page.locator('select[name="account[phone_format]"]'); + if ((await select.count()) > 0) { + await select.selectOption("international"); + await page.getByRole("button", { name: /save/i }).first().click(); + await page.waitForTimeout(500); + } + + await goToContact(page, contactId); + await addPhoneToContact(page, "+44 20 7946 0958"); + + const content = await page.content(); + expect(content).toContain("+44 20 7946 0958"); + }); + test("phone displayed in International format", async ({ page }) => { // Change setting to International await page.goto("/settings/account"); From 4919369a469e2a5a2f79c6038df63463e5e310b3 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 00:44:26 +0300 Subject: [PATCH 51/58] docs(specs): design for Monica import coverage backfill Spec to address silent contact drop in Monica v4 /api/contacts listing. v4 LIMIT/OFFSET pagination over ORDER BY created_at loses a deterministic ~1.7% of contacts at tie-group boundaries with no visible error. Design adds a Phase 1.4 coverage check between the listing crawl and auto-merge: re-fetch meta.total, compare against import_records, backfill the gap via direct GET /api/contacts/:id for IDs in [min, max] not already seen, applying Monica's same is_active and is_partial filters client-side to avoid importing rows the listing deliberately hides. Partials ARE imported to anchor relationship targets. --- ...-monica-import-coverage-backfill-design.md | 173 ++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md diff --git a/docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md b/docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md new file mode 100644 index 0000000..6329d69 --- /dev/null +++ b/docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md @@ -0,0 +1,173 @@ +# Monica Import — Coverage Backfill Design + +**Date:** 2026-05-17 +**Status:** Draft +**Branch base:** `feat/v0.x-multi-area-improvements` (PR #23) or a follow-up off it + +## Problem + +Monica v4's `/api/contacts` listing endpoint silently drops a deterministic subset of contacts under certain conditions (observed: 18 of 1079 contacts missing in production data, ~1.7%). The cause is some interaction of MySQL `LIMIT/OFFSET` semantics with Monica's default `ORDER BY created_at` over rows whose sort-key values place them inside tie groups the engine resolves inconsistently across page boundaries. We were unable to find any explicit `WHERE` filter explaining the omissions: + +- `address_book_id IS NULL` — verified for all observed missing rows +- `is_partial = 0` — verified +- `is_active = 1` — verified +- `deleted_at IS NULL` — verified (rows return 200 on direct GET) +- Default sort, ASC/DESC `created_at`, ASC/DESC `updated_at` — all variants drop the same ~18 rows +- `meta.total` reports 1079 (matches DB count of listable contacts) but distinct IDs returned across pagination = 1061 + +The drop is **invisible**: no error, no warning, `meta.total` matches the row count returned. The importer reports `imported: 1079, skipped: 0, errors: 3` (where the 3 errors are downstream cross-reference failures that themselves are caused by the missing contacts). + +The user-visible symptom: Monica contacts that exist (verifiable via direct API GET and Monica's web UI) are absent from Kith after import. Cross-reference resolution (first_met_through, relationships) logs cryptic warnings about contacts not found in `import_records`. + +## Goal + +Make the Monica importer **self-verifying** for coverage. After the paginated listing crawl, compare its result against Monica's authoritative `meta.total`. For any gap, enumerate the missing IDs via direct `GET /api/contacts/:id` and feed them through the existing import pipeline — applying the same acceptance filters Monica's listing applies, so we don't accidentally import rows the listing deliberately hides. + +Side benefit: **partial contacts (relationship-target placeholders) become importable**, closing the existing class of `"Skipping relationship X between A and B: one or both contacts not imported"` warnings. + +## Non-Goals + +- No CardDAV migration. The existing REST API pipeline stays. +- No changes to auto-merge logic. The 759 auto-merges observed in production are intentional given the user's Monica data shape (CardDAV-bug duplicates). +- No schema migration. No new account fields, no new contact columns. +- No retry-with-different-sort scheme. We already proved unioning 5 sort orders still yields 1061 distinct IDs for this account; throwing more pagination at the problem doesn't recover the lost rows. +- No deduplication of the backfilled contacts against the listing crawl result. The skip-already-seen check is by `source_entity_id`, not by name/email/phone heuristic. + +## Design + +### Where it fits in the pipeline + +`Kith.Imports.Sources.MonicaApi.crawl/5` orchestrates several phases. The backfill is a new **Phase 1.4** between the listing crawl (Phase 1) and the auto-merge step (Phase 1.5): + +``` +Phase 1 crawl_all_contacts/1 (listing — may drop rows silently) +Phase 1.4 coverage_check_and_backfill/2 ← NEW (recover missing IDs) +Phase 1.5 auto_merge_duplicates/2 (now sees backfilled contacts too) +Phase 2 resolve_cross_references/3 (now finds previously-missing references) +Phase 3 misc-data fetch (unchanged) +Phase 4 MonicaMiscDataWorker (unchanged) +``` + +Inserting before auto-merge means backfilled contacts are first-class citizens for the remainder of the pipeline — they can be auto-merge-evaluated, cross-reference-resolved, photo-synced. + +### Algorithm + +`coverage_check_and_backfill(ctx, listing_acc) :: {updated_acc, deferred}` + +1. **Re-fetch `meta.total`** via `GET /api/contacts?limit=1&page=1`. This is the authoritative count of listable contacts at the moment we're checking. Use this rather than the `total` from the listing crawl because the listing's value may be stale if the crawl was long-running. + +2. **Read what we have:** `SELECT source_entity_id::int, MIN(...), MAX(...) FROM import_records WHERE import_id = ? AND source_entity_type = 'contact'`. This gives us the seen-IDs set, plus the `min_id` and `max_id`. + +3. **If `meta.total == count(distinct source_entity_id)`**, the listing was complete. Return `listing_acc` unchanged. Record `coverage_backfill: %{gap_detected: 0}` in the summary. + +4. **Scan the gap:** for each integer `id` in `[min_id, max_id + safety_margin]` not in our seen-IDs set, issue `GET /api/contacts/:id`. `safety_margin = 50` to handle the case where `meta.total` is higher than our observed `max_id` (e.g., a contact added during the import that has the highest ID). Cap iterations at a hard limit of `max_id - min_id + 100` to guarantee termination. + +5. **Per-response handling:** + + | HTTP status | Body shape | Action | Counter | + |---|---|---|---| + | 404 | — | Skip silently | `skipped_deleted` | + | 200 | `is_active == false` | Skip | `skipped_inactive` | + | 200 | `is_active == true && is_partial == true` | Process via `safe_import_api_contact/5` | `imported_partial` | + | 200 | `is_active == true && is_partial == false` | Process via `safe_import_api_contact/5` | `imported_full` | + | 429 / 5xx | — | Existing `RateLimiter` + Req retry | (existing behavior) | + | other | — | Log warning, count as error | `errors` | + + Order of evaluation matters: `is_active == false` is checked **before** `is_partial`, so an inactive partial is skipped (consistent with Monica's listing, which applies both filters with AND semantics). + + **Not surfaced today:** Monica v4's `/api/contacts` listing also filters `address_book_id IS NULL`, but the API response body never exposes that column (confirmed via `ContactBase::toArrayInternal` source inspection). We have no client-side way to enforce that filter on direct-GET responses. The user has zero named address books in their account, so this is a no-op for the current data. Documented here so a future maintainer dealing with named-address-book accounts knows where to add the check. + +6. **Early termination:** after each successful import (counter `imported_full` or `imported_partial` increments), recompute `total_distinct = count(distinct source_entity_id) in import_records`. If `total_distinct == meta.total`, break out of the scan loop. This is the typical termination — once we've recovered the missing rows, scanning the rest of the ID space is wasted work. + +7. **Progress broadcasting:** broadcast `{:import_backfill_progress, %{checked: N, imported: M, remaining: K}}` on the existing `import:#{account_id}` topic every 10 IDs so the LiveView shows backfill activity (or, at minimum, doesn't appear frozen between Phase 1 and Phase 1.5). + +8. **Summary writeback:** populate the import's `summary` map under a new `coverage_backfill` key (see below). + +### Summary surface + +The import job's `summary` gets a new nested map: + +```elixir +%{ + # ...existing keys... + coverage_backfill: %{ + gap_detected: 18, # meta.total - count_distinct_at_start_of_phase_1.4 + range_scanned: 29, # IDs we actually issued GETs for + imported_full: 16, # 200 + active + not-partial + imported_partial: 2, # 200 + active + partial + skipped_deleted: 11, # 404s (gaps in Monica's ID space — expected) + skipped_inactive: 0, # 200 + is_active=false + skipped_addressbook: 0, # 200 + address_book_id != null (defensive) + errors: 0, # unexpected statuses / response shapes + unresolved_gap: 0 # meta.total - count_distinct_at_end_of_phase_1.4 + } +} +``` + +**`unresolved_gap` is the self-reporting safety net.** If it ends up > 0, the import didn't fully recover and the operator can see this in the import summary. Without this field, the original bug was undetectable from the outside. Logging at `:warning` level when `unresolved_gap > 0`. + +### Acceptance filter mirrors Monica's listing + +The 200-with-active-and-not-partial OR partial-active path is the only path that creates an `import_record`. Inactive contacts (Monica-archived) and address-book-scoped contacts (future-proofing) are deliberately NOT imported, because their absence in the listing isn't a bug — it's the listing's intended filter. + +### What changes in `import_api_contact_children` for partials + +Existing partials in Monica's data model typically have: +- `first_name`, `last_name` set +- Empty `contact_fields`, no addresses, no notes, no relationships of their own + +The existing `import_api_contact_children/7` handler gracefully no-ops on empty collections. No special-case branch needed in this spec — partials flow through the same pipeline as full contacts; they just end up with sparse data. + +If a partial later becomes a full contact in Monica (the user fills it in), the next import will see it via the listing endpoint, call `handle_existing_contact/7` → `do_update_api_contact/7`, and merge the richer data into the existing Kith record. So the partial-stub state is forward-compatible with Monica's natural data evolution. + +## Code Touchpoints + +- `lib/kith/imports/sources/monica_api.ex`: + - New private function `coverage_check_and_backfill/2` + - New private function `fetch_single_contact/2` that returns `{:ok, body} | {:not_found} | {:error, reason}` + - New private function `accept_backfill_response/1` that returns `:import_full | :import_partial | :skip_inactive | :skip_addressbook | :error` + - Wire-up in the orchestrator (between Phase 1 and Phase 1.5) — single new function call + - Summary-map writeback +- No changes to `MonicaApiCrawlWorker`, no changes to schemas, no changes to other workers + +## Tests + +`test/kith/imports/sources/monica_api_coverage_test.exs` (or additions to existing test files — to be confirmed in the plan): + +1. **Happy path: gap detected and closed.** Mock listing returns IDs `[1, 2, 3, 5]` with `meta.total: 5`. Mock direct GET for ID 4 returns 200 + `is_active: true, is_partial: false`. Assert exactly one direct GET issued, contact imported, `import_records` count rises to 5, `unresolved_gap: 0`, `imported_full: 1`. + +2. **Gap closed by mixed responses.** Listing returns `[1, 3, 5]`, `meta.total: 5`. Direct GET: ID 2 → 404, ID 4 → 200. Assert one 404 counted as `skipped_deleted`, one import. Early termination after ID 4. + +3. **Inactive contact in gap is skipped.** Direct GET returns 200 + `is_active: false`. Assert no import_record created, `skipped_inactive: 1`, no error. + +4. **Partial contact in gap is imported.** Direct GET returns 200 + `is_active: true, is_partial: true`. Assert import_record created, `imported_partial: 1`, contact written with `first_name`/`last_name` only. + +5. **Address-book-scoped contact is skipped (defensive).** Direct GET returns 200 with a hypothetical `address_book_id: 7` (not in current API, but the filter exists). Assert `skipped_addressbook: 1`. + +6. **No gap → no backfill.** Listing returns 5 distinct IDs, `meta.total: 5`. Assert zero direct GETs issued, summary shows `gap_detected: 0, range_scanned: 0`. + +7. **Unresolved gap reported.** Listing returns `[1, 3]`, `meta.total: 5`. Direct GETs for 2 → 404, 4 → 404, 5 → 404. Assert `unresolved_gap: 2` in summary and a `:warning` log entry. + +8. **Early termination.** Listing returns 8 IDs in range `[1, 100]`, `meta.total: 10`. Direct GETs for 9, 10 succeed (in numerical order). Assert scan stops after ID 10 and IDs 11-100 are NOT requested even though they're in `[min_id+safety, max_id+50]`. + +9. **Safety margin.** Listing returns 5 IDs `[1..5]`, `meta.total: 6`. Direct GET for ID 6 succeeds. Assert backfill issued the GET despite ID 6 being one past the observed `max_id` (covered by `safety_margin`). + +10. **Hard iteration cap.** Listing returns 1 ID `[1]`, `meta.total: 1000`. Assert backfill stops after `max_iterations = (max_id - min_id) + 100 = 100` GETs, logs a warning, leaves `unresolved_gap > 0`. + +11. **Backfilled contact participates in auto-merge.** Listing returns 2 contacts. Direct GET for missing ID 3 returns a contact that's a duplicate (shared phone) of contact 1. Assert auto-merge runs after backfill and contact 3 is auto-merged into contact 1 (when `auto_merge_duplicates: true`). + +12. **Backfilled partial unblocks relationship resolution.** Listing returns contact A whose `first_met_through_contact_id` points to contact B (which Monica's listing doesn't return because B is partial). Backfill imports B. Assert Phase 2's `resolve_first_met_through` succeeds for A → B (no warning logged). + +## Risks + +- **Performance:** ~30s additional API time per import for the user's data shape (28-30 GETs at 55/min). Larger gaps would scale linearly. The `max_iterations` cap prevents runaway scans on pathological data. +- **Monica's API surface drift:** if Monica adds a new listing filter we don't know about (e.g., a hypothetical `is_hidden` flag), the backfill would import rows Monica wants hidden. Mitigated by surfacing all `200` skips in the summary so an operator can audit unfamiliar fields. v5 is a separate code path anyway. +- **Auto-merge interaction:** backfilled contacts go through auto-merge after the listing-crawl contacts. If a backfilled contact has the same name+phone/email as an already-imported contact, it gets merged in. This is the correct behavior given auto-merge's existing semantics, but the user should be aware that backfilled-then-merged contacts will have a `local_entity_id` pointing at the survivor — same as any auto-merged contact. + +## Acceptance Criteria + +1. The 12 tests above pass. +2. `mix quality` passes (compile + format + credo + sobelow + dialyzer). +3. Re-running the user's Monica import with `auto_merge_duplicates: true` produces a summary where `coverage_backfill.imported_full + coverage_backfill.imported_partial` equals the original gap (18 in observed data). +4. Subsequent Phase 2 cross-reference resolution surfaces zero `"Could not resolve first_met_through"` and zero `"Skipping relationship ... one or both contacts not imported"` warnings whose target IDs are inside the backfilled set. +5. The `coverage_backfill.unresolved_gap` field is visible in the import job's `summary` map and surfaces in the import wizard UI (or at minimum in the worker logs). From f92888577bdef9ed8cde7e50cf74a5652581f5d1 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 00:55:35 +0300 Subject: [PATCH 52/58] docs(plans): implementation plan for Monica coverage backfill Bite-sized TDD plan in 7 tasks: thread ref_data through crawl_all_contacts, add fetch_single_contact + accept_backfill_response helpers, implement coverage_check_and_backfill/3 core algorithm, wire into crawl/5 between Phase 1 and 1.5, extend summary, exhaustive test matrix (10 scenarios covering happy path, mixed 200+404, inactive skip, partial import, no-op-no-gap, unresolved-gap warning, early termination, auto-merge interaction, safety margin, hard iteration cap). --- ...6-05-17-monica-import-coverage-backfill.md | 1404 +++++++++++++++++ 1 file changed, 1404 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-17-monica-import-coverage-backfill.md diff --git a/docs/superpowers/plans/2026-05-17-monica-import-coverage-backfill.md b/docs/superpowers/plans/2026-05-17-monica-import-coverage-backfill.md new file mode 100644 index 0000000..f7930e1 --- /dev/null +++ b/docs/superpowers/plans/2026-05-17-monica-import-coverage-backfill.md @@ -0,0 +1,1404 @@ +# Monica Import Coverage Backfill Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a Phase 1.4 coverage check to the Monica importer that detects gaps in `/api/contacts` pagination and backfills missing contacts via direct `GET /api/contacts/:id`, applying the same `is_active`/`is_partial` filters Monica's listing applies. + +**Architecture:** Single-file change to `lib/kith/imports/sources/monica_api.ex`. New private function `coverage_check_and_backfill/3` runs after `crawl_all_contacts/1` and before `auto_merge_duplicates/2`. It re-fetches Monica's authoritative `meta.total`, compares against `import_records` count, and for any gap iterates `[min_id, max_id + safety_margin]` issuing `GET /api/contacts/:id` for unseen IDs. Each response is dispatched: 404 → skip; 200 + `is_active=false` → skip; 200 + remainder → feed through the existing `safe_import_api_contact/5` pipeline so backfilled contacts are first-class participants in auto-merge and cross-reference resolution. + +**Tech Stack:** Elixir, Ecto, Oban, Req (HTTP client), Req.Test (mock plug for tests), ExUnit, `Kith.Imports.Sources.MonicaApi.RateLimiter` (existing per-host limiter), Monica v4 REST API. + +**Design spec:** `docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md` + +--- + +## File Inventory + +| File | Change | +|---|---| +| `lib/kith/imports/sources/monica_api.ex` | Modify `crawl_all_contacts/1` return shape; add `fetch_single_contact/2`, `accept_backfill_response/1`, `coverage_check_and_backfill/3`; wire into `crawl/5`; extend summary writeback | +| `test/kith/imports/sources/monica_api_test.exs` | Add `describe "coverage_check_and_backfill"` block with 12 tests | +| `docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md` | Already committed (no change) | + +No new files, no schema changes, no migrations, no worker changes. + +--- + +## Pre-flight + +- [ ] **Step 0a: Confirm worktree** + +```bash +pwd +# Expected: /Users/basharqassis/projects/kith/.worktrees/monica-coverage-backfill +git branch --show-current +# Expected: fix/monica-import-coverage-backfill +git log --oneline -1 +# Expected: 4919369 docs(specs): design for Monica import coverage backfill +``` + +- [ ] **Step 0b: Fetch dependencies** + +```bash +mix deps.get +``` + +Expected: `All dependencies are up to date` (deps were already fetched when this worktree's branch base was set up). + +- [ ] **Step 0c: Baseline tests pass** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs +``` + +Expected: all tests pass. If any fail, STOP and report — the plan assumes a green baseline. + +- [ ] **Step 0d: Confirm test database is set up** + +```bash +MIX_ENV=test mix ecto.create --quiet 2>&1 | tail -3 +MIX_ENV=test mix ecto.migrate --quiet 2>&1 | tail -3 +``` + +Expected: either both quiet (already set up) or migration messages. No errors. + +--- + +## Task 1: Thread `ref_data` through `crawl_all_contacts/1` + +**Why this task exists:** The backfill needs to call `safe_import_api_contact/5`, which requires the `ref_data` argument (Monica genders/tags/contact_field_types map). The listing crawl already builds and updates `ref_data` per page but currently discards it at the end. We need to return it so the backfill can reuse it without rebuilding from scratch. + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` + - `crawl_all_contacts/1` (line 161) + - `crawl_contacts_loop/2` (line 179) + - `handle_contacts_page/4` (around line 215) + - The four return tuples inside `crawl_contacts_loop/2` (`{:ok, [], _meta}`, `{:ok, unexpected}`, `{:error, :rate_limited}`, `{:error, reason}`) + - `crawl/5` call site (line 95) + +- [ ] **Step 1.1: Read the existing `crawl_all_contacts/1` and `crawl_contacts_loop/2`** + +Open `lib/kith/imports/sources/monica_api.ex` and confirm the current return shape is `{acc, deferred}`. Note that `state.ref_data` is built up inside the loop but never returned. + +- [ ] **Step 1.2: Add a failing test** + +The existing test file has many `describe` blocks. Add this test inside the existing `describe "crawl/5"` block (or at the very end of the test file, in a new `describe "crawl/5 ref_data threading"` block — either is fine, the agent should pick the spot consistent with the surrounding test style): + +```elixir +test "crawl/5 carries ref_data through to be available for downstream phases", %{ + user: user, + account_id: account_id +} do + import_job = api_import_fixture(account_id, user.id) + + # Single-page listing with one contact and one gender to populate ref_data + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 1, + "first_name" => "Alpha", + "last_name" => "One", + "is_active" => true, + "is_partial" => false, + "gender" => %{"name" => "Male", "type" => "M"}, + "contactFields" => [] + } + ], + "meta" => %{"total" => 1, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "auto_merge_duplicates" => false + }) + + # Sanity: import succeeded with one contact and zero coverage gap. + assert summary.imported == 1 + assert summary.coverage_backfill.gap_detected == 0 +end +``` + +This test will FAIL initially because `summary.coverage_backfill` doesn't exist yet (Task 6 adds it). That's fine — it serves as a forward-looking sanity that the orchestration changes still produce one alive contact. The reason we add it here, in Task 1, is to anchor the worktree-state of "ref_data threading produces unchanged behavior". The full coverage_backfill semantics arrive in later tasks. + +- [ ] **Step 1.3: Run the new test to confirm it FAILS for the expected reason** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs --only line: +``` + +Expected: failure on `summary.coverage_backfill.gap_detected == 0` (KeyError). NOT failure on the import succeeding — the import itself should still produce `imported: 1`. + +- [ ] **Step 1.4: Modify `crawl_all_contacts/1` to return ref_data** + +Replace the existing `crawl_all_contacts/1` and `crawl_contacts_loop/2` such that: + +- `crawl_all_contacts/1` returns `{acc, deferred, ref_data}` instead of `{acc, deferred}`. +- `crawl_contacts_loop/2` mirrors that: every termination path returns `{acc, deferred, ref_data}`. +- `state.ref_data` is the value returned (defaults to `nil` if no contacts were ever fetched). + +Specific edits: + +```elixir +defp crawl_all_contacts(ctx) do + initial_state = %{ + page: 1, + total: nil, + acc: %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, + deferred: %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }, + ref_data: nil, + global_idx: 0 + } + + crawl_contacts_loop(ctx, initial_state) +end + +defp crawl_contacts_loop(ctx, state) do + case fetch_contacts_page(ctx.credential, state.page) do + {:ok, %{"data" => contacts, "meta" => meta}} when is_list(contacts) -> + handle_contacts_page(ctx, state, contacts, meta) + + {:ok, %{"data" => [], "meta" => _}} -> + {state.acc, state.deferred, state.ref_data} + + {:ok, unexpected} -> + Logger.error("[MonicaApi] Unexpected contacts response: #{inspect(unexpected)}") + acc = add_error(state.acc, "Unexpected API response format from contacts endpoint") + {acc, state.deferred, state.ref_data} + + {:error, :rate_limited} -> + acc = add_error(state.acc, "Rate limited by Monica API after retries") + {acc, state.deferred, state.ref_data} + + {:error, reason} -> + acc = + add_error(state.acc, "Failed to fetch contacts page #{state.page}: #{inspect(reason)}") + + {acc, state.deferred, state.ref_data} + end +end +``` + +- [ ] **Step 1.5: Update `handle_contacts_page/4` to return the third element** + +Find `handle_contacts_page/4`. It currently calls `process_contact_page/6` (or similar) and either recurses via `crawl_contacts_loop/2` or returns `{acc, deferred}`. Replace its terminal return with `{acc, deferred, ref_data}`. The recursive case is fine — it threads `ref_data` through `next_state` already. + +Concretely, find the line in `handle_contacts_page/4` that returns to the caller (the non-recursive branch — when `state.page >= last_page`) and change `{acc, deferred}` to `{acc, deferred, ref_data}`. + +- [ ] **Step 1.6: Update the orchestrator call in `crawl/5`** + +Around line 95 of `monica_api.ex`, change: + +```elixir +# Phase 1: Crawl contacts +{acc, deferred} = crawl_all_contacts(ctx) +``` + +to: + +```elixir +# Phase 1: Crawl contacts +{acc, deferred, ref_data} = crawl_all_contacts(ctx) +``` + +The `ref_data` variable becomes available for use in Task 4's wiring. For Task 1 it's bound but unused — Elixir will compile-warn unless we mark it `_ref_data`. Since the next task will use it, leave it as `ref_data` and add a single line below it to silence the warning during this task only: + +```elixir +{acc, deferred, ref_data} = crawl_all_contacts(ctx) +_ = ref_data # consumed by coverage_check_and_backfill/3 in Task 4 +``` + +The `_ = ref_data` line is intentionally removed in Task 4 when the variable becomes used. + +- [ ] **Step 1.7: Run the full file's tests** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs +``` + +Expected: all PRE-EXISTING tests pass (the ref_data plumbing doesn't change semantics). The new test from Step 1.2 STILL FAILS at the `coverage_backfill` assertion — that's expected; it'll go green in Task 6. + +If any pre-existing test breaks, the threading was done wrong. Inspect the failure carefully; the most likely cause is a missing third element in one of the return tuples in `crawl_contacts_loop/2` or `handle_contacts_page/4`. + +- [ ] **Step 1.8: `mix compile --warnings-as-errors`** + +```bash +mix compile --warnings-as-errors +``` + +Expected: clean. The `_ = ref_data` line silences the "unused" warning for the bound-but-not-yet-consumed variable. + +- [ ] **Step 1.9: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex test/kith/imports/sources/monica_api_test.exs +git -c commit.gpgsign=false commit -m "refactor(monica): thread ref_data through crawl_all_contacts return + +Phase 1.4 (coverage backfill, next commits) needs ref_data so it can +call safe_import_api_contact/5 on directly-fetched contacts. crawl_all_contacts/1 +was already building ref_data per page but discarding it on return; this +commit threads it through to the orchestrator. No behavior change." +``` + +--- + +## Task 2: Add `fetch_single_contact/2` helper + +**Why this task exists:** The existing `api_get_json/3` returns `{:error, "Unexpected status: 404"}` for 404, indistinguishable from other unexpected statuses. The backfill needs to treat 404 as a normal expected outcome (Monica-side soft-delete), not an error. We add a focused helper that returns a 3-way variant. + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (add new private function near `api_get_json/3` at line 1182) +- Modify: `test/kith/imports/sources/monica_api_test.exs` (new tests) + +- [ ] **Step 2.1: Write the failing tests** + +Add a new `describe` block at the end of `test/kith/imports/sources/monica_api_test.exs` (the file has many describe blocks; place this one consistently with the existing style — after the existing final block): + +```elixir +describe "fetch_single_contact/2 (private — tested via send_test)" do + # The helper is private; we exercise it via a public seam — the coverage + # backfill end-to-end test in a later describe block. This describe block + # exists only as a placeholder for direct unit tests if the function were + # ever made public. + test "documented behavior — see coverage_check_and_backfill tests" do + assert true + end +end +``` + +(Private functions in Elixir aren't directly testable from outside the module. The actual behavior of `fetch_single_contact/2` is exercised through `coverage_check_and_backfill/3` in Task 4, where every status branch gets a Req.Test stub. The placeholder above documents the deliberate skip.) + +- [ ] **Step 2.2: Implement the helper** + +Insert the new private function in `lib/kith/imports/sources/monica_api.ex` immediately AFTER `api_get_json/3` (around line 1190). Use this exact code: + +```elixir +defp fetch_single_contact(credential, monica_id) do + url = "#{credential.url}/api/contacts/#{monica_id}" + + case api_get(credential, url, []) do + {:ok, %{status: 200, body: %{"data" => contact}}} when is_map(contact) -> + {:ok, contact} + + {:ok, %{status: 404}} -> + :not_found + + {:ok, %{status: 429}} -> + {:error, :rate_limited} + + {:ok, %{status: status}} -> + {:error, "Unexpected status: #{status}"} + + {:error, reason} -> + {:error, reason} + end +end +``` + +- [ ] **Step 2.3: Compile + run tests** + +```bash +mix compile --warnings-as-errors +mix test test/kith/imports/sources/monica_api_test.exs +``` + +Expected: clean compile (the helper is unused so far but the `defp` plus the warning-suppressing `_ = ref_data` from Task 1 cover it — actually `defp` doesn't trigger an unused-warning in Elixir at module level, so no extra suppression needed). All previously-passing tests still pass. + +If you get an "unused function" warning on `fetch_single_contact`, add `@compile {:nowarn_unused_function, fetch_single_contact: 2}` near the top of the module. Remove that compile directive in Task 4 when the function becomes used. + +- [ ] **Step 2.4: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex test/kith/imports/sources/monica_api_test.exs +git -c commit.gpgsign=false commit -m "feat(monica): add fetch_single_contact/2 helper + +Phase 1.4 coverage backfill needs to distinguish 404 (Monica-side +soft-delete, expected) from other errors. api_get_json/3 lumps them +all into {:error, \"Unexpected status: N\"}. New helper returns +{:ok, contact} | :not_found | {:error, reason}." +``` + +--- + +## Task 3: Add `accept_backfill_response/1` dispatch helper + +**Why this task exists:** The acceptance logic (is_active=false → skip, is_partial=true → import-as-partial, etc.) is a pure function of the response body. Extracting it as a named helper lets the main backfill loop stay focused on iteration + accumulation, and lets the dispatch logic be tested independently via unit tests on the public path in Task 4. + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (add new private function) + +- [ ] **Step 3.1: Insert the helper near `fetch_single_contact/2`** + +Immediately after `fetch_single_contact/2` in `lib/kith/imports/sources/monica_api.ex`: + +```elixir +# Mirror Monica's listing filter on direct-GET responses: +# - Monica's index() chains ->real()->active() which means +# is_active = 1 AND is_partial = 0. +# - Partials still anchor relationship targets, so we accept them +# (relationship resolution depends on importing the partial stubs). +# - Inactive contacts are deliberately hidden by Monica's UI; skip +# them so we don't import rows Monica wants archived. +# +# Returns one of: +# :import_full — full contact, process via safe_import_api_contact/5 +# :import_partial — partial contact, also process (for relationships) +# :skip_inactive — is_active is false-ish; count as skipped_inactive +defp accept_backfill_response(%{"is_active" => true, "is_partial" => true}), + do: :import_partial + +defp accept_backfill_response(%{"is_active" => true, "is_partial" => false}), + do: :import_full + +# Anything where is_active is false (or missing — defensive, Monica +# always serializes it) is skipped. +defp accept_backfill_response(%{"is_active" => false}), do: :skip_inactive +defp accept_backfill_response(_other), do: :skip_inactive +``` + +Note: the falling-through `_other` clause counts as `:skip_inactive` because in practice anything missing both `is_active` and `is_partial` from Monica's response is malformed and we choose the safer "don't import" default. If this ever fires in production, the import summary's `skipped_inactive` count would surface it. + +- [ ] **Step 3.2: Compile** + +```bash +mix compile --warnings-as-errors +``` + +Expected: clean. If "unused function" warning fires, add to the existing `@compile {:nowarn_unused_function, ...}` directive (if you created one in Task 2.3) or add `accept_backfill_response: 1` to its list. + +- [ ] **Step 3.3: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex +git -c commit.gpgsign=false commit -m "feat(monica): add accept_backfill_response/1 dispatch + +Mirrors Monica's listing filter (->real()->active() = is_active=1 AND +is_partial=0) on direct-GET responses so the backfill doesn't import +rows Monica deliberately hides from the listing. Partials are still +accepted because they anchor relationship targets." +``` + +--- + +## Task 4: Add `coverage_check_and_backfill/3` + +**Why this task exists:** This is the heart of the fix. Implements the algorithm from the spec: re-fetch `meta.total`, compute gap, iterate missing IDs in [min, max + safety_margin], early-terminate when gap closes, cap iterations. + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` +- Modify: `test/kith/imports/sources/monica_api_test.exs` + +- [ ] **Step 4.1: Write the failing test — happy path** + +Add a new `describe "coverage_check_and_backfill"` block in the test file (immediately after the placeholder `describe "fetch_single_contact/2"` from Task 2.1): + +```elixir +describe "coverage_check_and_backfill" do + test "closes a single-ID gap via direct fetch", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path, conn.query_string} do + {"GET", "/api/contacts", qs} when qs != "" -> + # Listing call — return IDs 1, 2, 3, 5 (ID 4 missing) with meta.total=5 + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 2, 3, 5], fn id -> + %{ + "id" => id, + "first_name" => "Listed#{id}", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + end), + "meta" => %{ + "total" => 5, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/4", _} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 4, + "first_name" => "Backfilled4", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + }) + + {"GET", "/api/contacts/" <> _id, _} -> + # Any other direct-fetch ID returns 404 + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "auto_merge_duplicates" => false + }) + + assert summary.coverage_backfill.gap_detected == 1 + assert summary.coverage_backfill.imported_full == 1 + assert summary.coverage_backfill.imported_partial == 0 + assert summary.coverage_backfill.skipped_deleted == 0 + assert summary.coverage_backfill.skipped_inactive == 0 + assert summary.coverage_backfill.unresolved_gap == 0 + assert summary.imported == 5 + + # And the backfilled contact is now in import_records + record = Imports.find_import_record(account_id, "monica_api", "contact", "4") + refute is_nil(record) + end +end +``` + +- [ ] **Step 4.2: Run the test to confirm it FAILS at the assertion (not at a compile error)** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs --only line: +``` + +Expected: FAIL — `coverage_backfill` key not in summary OR `imported_full == 1` not satisfied. Specifically the failure should be a key/value mismatch, not a function-undefined error. + +- [ ] **Step 4.3: Implement `coverage_check_and_backfill/3`** + +Insert the new private function in `lib/kith/imports/sources/monica_api.ex` immediately AFTER `crawl_all_contacts/1` (or in a section close to it). Use this exact code: + +```elixir +# ── Phase 1.4: Coverage check + backfill ────────────────────────────── +# +# Monica's /api/contacts listing endpoint silently drops a subset of +# contacts under LIMIT/OFFSET pagination over its default sort (this is +# a v4 server-side issue we can't fix). We compensate by re-fetching +# meta.total and any IDs in [min_seen, max_seen + safety_margin] that +# weren't returned by the listing. +# +# See docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md +# for full design context. + +@safety_margin 50 +@max_iterations_buffer 100 + +defp coverage_check_and_backfill(ctx, acc, ref_data) do + case fetch_meta_total(ctx.credential) do + {:ok, monica_total} -> + do_backfill(ctx, acc, ref_data, monica_total) + + {:error, _reason} -> + # Can't determine gap; pass through with zeroed coverage stats. + {acc, ref_data, empty_backfill_stats()} + end +end + +defp fetch_meta_total(credential) do + url = "#{credential.url}/api/contacts" + + case api_get_json(credential, url, limit: 1, page: 1) do + {:ok, %{"meta" => %{"total" => total}}} when is_integer(total) -> {:ok, total} + _ -> {:error, :unknown_total} + end +end + +defp do_backfill(ctx, acc, ref_data, monica_total) do + seen_ids = seen_source_ids(ctx.import_job.id) + + case Enum.empty?(seen_ids) do + true -> + # Nothing imported by listing; refuse to scan an unbounded range. + {acc, ref_data, empty_backfill_stats(gap: monica_total)} + + false -> + min_id = Enum.min(seen_ids) + max_id = Enum.max(seen_ids) + + gap = monica_total - MapSet.size(seen_ids) + + stats = %{ + gap_detected: gap, + range_scanned: 0, + imported_full: 0, + imported_partial: 0, + skipped_deleted: 0, + skipped_inactive: 0, + errors: 0, + unresolved_gap: 0 + } + + if gap <= 0 do + {acc, ref_data, %{stats | unresolved_gap: 0}} + else + scan_gap_range(ctx, acc, ref_data, seen_ids, min_id, max_id, monica_total, stats) + end + end +end + +defp seen_source_ids(import_id) do + from(ir in Kith.Imports.ImportRecord, + where: + ir.import_id == ^import_id and + ir.source_entity_type == "contact", + select: ir.source_entity_id + ) + |> Repo.all() + |> Enum.flat_map(fn s -> + case Integer.parse(s) do + {n, ""} -> [n] + _ -> [] + end + end) + |> MapSet.new() +end + +defp scan_gap_range(ctx, acc, ref_data, seen_ids, min_id, max_id, monica_total, stats) do + scan_start = min_id + scan_end = max_id + @safety_margin + max_iterations = max_id - min_id + @max_iterations_buffer + + Logger.info( + "[MonicaApi] Coverage backfill scanning [#{scan_start}..#{scan_end}] " <> + "(seen=#{MapSet.size(seen_ids)}, monica_total=#{monica_total}, gap=#{stats.gap_detected})" + ) + + candidates = + scan_start..scan_end + |> Enum.reject(&MapSet.member?(seen_ids, &1)) + |> Enum.take(max_iterations) + + initial = {acc, ref_data, stats, seen_ids} + + {final_acc, final_ref_data, final_stats, final_seen} = + Enum.reduce_while(candidates, initial, fn id, {a, rd, s, seen} -> + # Early termination: if we've closed the gap, stop. + if MapSet.size(seen) >= monica_total do + {:halt, {a, rd, s, seen}} + else + case fetch_and_dispatch_backfill(ctx, id, a, rd) do + {:imported_full, new_acc, new_ref_data} -> + {:cont, + {new_acc, new_ref_data, + %{s | range_scanned: s.range_scanned + 1, imported_full: s.imported_full + 1}, + MapSet.put(seen, id)}} + + {:imported_partial, new_acc, new_ref_data} -> + {:cont, + {new_acc, new_ref_data, + %{s | range_scanned: s.range_scanned + 1, imported_partial: s.imported_partial + 1}, + MapSet.put(seen, id)}} + + :skipped_deleted -> + {:cont, {a, rd, %{s | range_scanned: s.range_scanned + 1, skipped_deleted: s.skipped_deleted + 1}, seen}} + + :skipped_inactive -> + {:cont, {a, rd, %{s | range_scanned: s.range_scanned + 1, skipped_inactive: s.skipped_inactive + 1}, seen}} + + {:error, _reason} -> + {:cont, {a, rd, %{s | range_scanned: s.range_scanned + 1, errors: s.errors + 1}, seen}} + end + end + end) + + unresolved = max(0, monica_total - MapSet.size(final_seen)) + + if unresolved > 0 do + Logger.warning( + "[MonicaApi] Coverage backfill could not close the gap: " <> + "monica_total=#{monica_total}, seen=#{MapSet.size(final_seen)}, unresolved=#{unresolved}" + ) + end + + {final_acc, final_ref_data, %{final_stats | unresolved_gap: unresolved}} +end + +defp fetch_and_dispatch_backfill(ctx, monica_id, acc, ref_data) do + case fetch_single_contact(ctx.credential, monica_id) do + :not_found -> + :skipped_deleted + + {:error, reason} -> + {:error, reason} + + {:ok, api_contact} -> + case accept_backfill_response(api_contact) do + :skip_inactive -> + :skipped_inactive + + verdict when verdict in [:import_full, :import_partial] -> + # Update ref_data with the new contact's gender/tags/cfts + new_ref_data = + build_or_update_ref_data(ctx.account_id, [api_contact], ref_data) + + # Feed through the existing import pipeline. safe_import_api_contact/5 + # handles success/failure logging and accumulator updates internally. + {new_acc, _new_deferred} = + safe_import_api_contact(ctx, api_contact, new_ref_data, acc, %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }) + + case verdict do + :import_full -> {:imported_full, new_acc, new_ref_data} + :import_partial -> {:imported_partial, new_acc, new_ref_data} + end + end + end +end + +defp empty_backfill_stats(opts \\ []) do + %{ + gap_detected: Keyword.get(opts, :gap, 0), + range_scanned: 0, + imported_full: 0, + imported_partial: 0, + skipped_deleted: 0, + skipped_inactive: 0, + errors: 0, + unresolved_gap: Keyword.get(opts, :gap, 0) + } +end +``` + +Note about the dropped `deferred` from `safe_import_api_contact/5`: backfilled contacts don't contribute to Phase 2 cross-reference resolution because their first-met-through and relationships were already collected during Phase 1's listing crawl (Phase 1 didn't have these contacts to add them, but other listed contacts ARE the ones whose references to these backfilled IDs we want to resolve). The empty deferred map here is intentional — we don't want to recurse into Phase 2 from backfill. + +- [ ] **Step 4.4: Run the test from Step 4.1** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs --only line: +``` + +Expected: FAIL — `summary.coverage_backfill` still doesn't exist because Task 5 hasn't wired it into the orchestrator yet. The function is implemented but uncalled. Move to Task 5. + +- [ ] **Step 4.5: `mix compile --warnings-as-errors`** + +```bash +mix compile --warnings-as-errors +``` + +Expected: clean. If `fetch_single_contact` or `accept_backfill_response` still show as unused, the `@compile {:nowarn_unused_function, ...}` directive from earlier tasks needs to include `coverage_check_and_backfill: 3`, `fetch_meta_total: 1`, `do_backfill: 4`, `seen_source_ids: 1`, `scan_gap_range: 8`, `fetch_and_dispatch_backfill: 4`, and `empty_backfill_stats: 1`. Add them all in one go and remove the directive entirely in Task 5 once `coverage_check_and_backfill/3` becomes called. + +- [ ] **Step 4.6: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex +git -c commit.gpgsign=false commit -m "feat(monica): coverage_check_and_backfill/3 core algorithm + +Implements the Phase 1.4 logic: re-fetch meta.total, compare against +import_records, iterate [min_id..max_id+50] for unseen IDs, dispatch +each via fetch_single_contact + accept_backfill_response, early-terminate +when gap closes, cap iterations at (max_id-min_id)+100 to guarantee +termination. Stats accumulator covers gap_detected, range_scanned, +imported_full, imported_partial, skipped_deleted, skipped_inactive, +errors, unresolved_gap. + +Wiring into crawl/5 is the next commit." +``` + +--- + +## Task 5: Wire into `crawl/5` and extend summary + +**Why this task exists:** Phase 1.4 has to be invoked between the listing crawl (Phase 1) and auto-merge (Phase 1.5). This is also where the `coverage_backfill` key gets added to the summary map. + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (around line 95 — orchestrator) +- Modify: `test/kith/imports/sources/monica_api_test.exs` (the test from Step 4.1 should now go GREEN) + +- [ ] **Step 5.1: Wire the call into `crawl/5`** + +In `lib/kith/imports/sources/monica_api.ex`, locate the Phase 1 → Phase 1.5 boundary (around line 95-99). Replace this block: + +```elixir +# Phase 1: Crawl contacts +{acc, deferred, ref_data} = crawl_all_contacts(ctx) +_ = ref_data # consumed by coverage_check_and_backfill/3 in Task 4 + +# Phase 1.5: Auto-merge definite duplicates (optional) +merge_result = + if opts["auto_merge_duplicates"] do + auto_merge_duplicates(account_id, import_job) + else + %{merged: 0, errors: []} + end +``` + +with: + +```elixir +# Phase 1: Crawl contacts +{acc, deferred, ref_data} = crawl_all_contacts(ctx) + +# Phase 1.4: Coverage check + backfill any silently-dropped contacts. +# See docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md +{acc, _ref_data, coverage_stats} = coverage_check_and_backfill(ctx, acc, ref_data) + +# Phase 1.5: Auto-merge definite duplicates (optional). +# Runs AFTER Phase 1.4 so backfilled contacts participate in auto-merge. +merge_result = + if opts["auto_merge_duplicates"] do + auto_merge_duplicates(account_id, import_job) + else + %{merged: 0, errors: []} + end +``` + +The `_ref_data` discard is intentional — Phase 1.5 onward doesn't need ref_data; Phase 4 (misc-data worker) rebuilds it on its own. + +- [ ] **Step 5.2: Extend the summary writeback** + +Locate the final `{:ok, %{ ... }}` summary in `crawl/5` (around line 140-150). Add a `coverage_backfill:` key to the map. Replace the existing summary construction: + +```elixir +{:ok, + %{ + imported: acc.contacts, + contacts: acc.contacts, + notes: acc.notes, + skipped: acc.skipped, + merged: merge_result.merged, + error_count: error_count, + errors: Enum.take(all_errors, 50), + misc_data_plan: Enum.reverse(deferred.misc_data) + }} +``` + +with: + +```elixir +{:ok, + %{ + imported: acc.contacts, + contacts: acc.contacts, + notes: acc.notes, + skipped: acc.skipped, + merged: merge_result.merged, + error_count: error_count, + errors: Enum.take(all_errors, 50), + misc_data_plan: Enum.reverse(deferred.misc_data), + coverage_backfill: coverage_stats + }} +``` + +Also update the cancellation summary at the end of the same function (the `catch :cancelled` branch). Currently: + +```elixir +catch + :cancelled -> + {:ok, + %{ + imported: 0, + contacts: 0, + notes: 0, + skipped: 0, + merged: 0, + error_count: 1, + errors: ["Import cancelled"] + }} +end +``` + +becomes: + +```elixir +catch + :cancelled -> + {:ok, + %{ + imported: 0, + contacts: 0, + notes: 0, + skipped: 0, + merged: 0, + error_count: 1, + errors: ["Import cancelled"], + coverage_backfill: empty_backfill_stats() + }} +end +``` + +- [ ] **Step 5.3: Run the happy-path test from Step 4.1** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs --only line: +``` + +Expected: PASS. All assertions about `summary.coverage_backfill.*` and `summary.imported == 5` hold. + +- [ ] **Step 5.4: Run the test from Task 1 (Step 1.2)** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs --only line: +``` + +Expected: PASS. The `coverage_backfill.gap_detected == 0` assertion now holds (single-contact listing, total=1, no gap). + +- [ ] **Step 5.5: Remove the `@compile {:nowarn_unused_function, ...}` directive** + +If Task 2.3 / 3.2 / 4.5 added a `@compile {:nowarn_unused_function, ...}` directive to suppress warnings on functions that weren't yet called, remove that directive now. All those functions are reachable through `coverage_check_and_backfill/3` which is reachable through `crawl/5`. + +```bash +mix compile --warnings-as-errors +``` + +Expected: clean. + +- [ ] **Step 5.6: Run the full test file** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs +``` + +Expected: all tests pass, including the two new ones from Task 1 and Task 4. + +- [ ] **Step 5.7: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex test/kith/imports/sources/monica_api_test.exs +git -c commit.gpgsign=false commit -m "feat(monica): wire coverage_check_and_backfill into crawl/5 + +Phase 1.4 now runs between the listing crawl (Phase 1) and auto-merge +(Phase 1.5). Backfilled contacts participate in auto-merge and Phase 2 +cross-reference resolution as first-class import-record holders. + +Import summary now carries coverage_backfill.{gap_detected, range_scanned, +imported_full, imported_partial, skipped_deleted, skipped_inactive, +errors, unresolved_gap}. The unresolved_gap field is the self-reporting +safety net: if it ends up > 0, the operator knows the listing dropped +contacts the backfill couldn't recover, surfaced in import.summary." +``` + +--- + +## Task 6: Round out the test matrix + +**Why this task exists:** The happy path is green. The spec lists 11 more test scenarios that lock in the edge-case behavior (404s, inactives, partials, early termination, hard cap, unresolved gap, auto-merge interaction, cross-ref unblock). Each scenario gets its own test. + +**Files:** +- Modify: `test/kith/imports/sources/monica_api_test.exs` + +For each of the following sub-tasks, the pattern is: +1. Add the test inside the `describe "coverage_check_and_backfill"` block from Task 4. +2. Run the single test to confirm it fails OR passes for the expected reason. +3. If it fails unexpectedly, the production code has a real bug — fix it inline and document in the commit. + +- [ ] **Step 6.1: Test — gap closed by mixed responses (200 + 404)** + +```elixir +test "closes a 1-of-2 gap when one direct fetch 404s", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 3, 5], fn id -> + %{"id" => id, "first_name" => "L#{id}", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + end), + "meta" => %{"total" => 5, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/2"} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + + {"GET", "/api/contacts/4"} -> + Req.Test.json(conn, %{ + "data" => %{"id" => 4, "first_name" => "B4", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.gap_detected == 2 + assert summary.coverage_backfill.imported_full == 1 + assert summary.coverage_backfill.skipped_deleted == 1 + assert summary.coverage_backfill.unresolved_gap == 1 +end +``` + +Run: `mix test test/kith/imports/sources/monica_api_test.exs --only line:`. +Expected: PASS. + +- [ ] **Step 6.2: Test — inactive contact skipped** + +```elixir +test "skips inactive contact in gap", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "first_name" => "A", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + ], + "meta" => %{"total" => 2, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{"id" => 2, "first_name" => "Inactive", "last_name" => "X", + "is_active" => false, "is_partial" => false, "contactFields" => []} + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.skipped_inactive == 1 + assert summary.coverage_backfill.imported_full == 0 + assert summary.coverage_backfill.unresolved_gap == 1 + + # Inactive contact was NOT written to import_records + refute Imports.find_import_record(account_id, "monica_api", "contact", "2") +end +``` + +Run, expected PASS. + +- [ ] **Step 6.3: Test — partial contact is imported** + +```elixir +test "imports partial contact in gap (relationships need it)", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "first_name" => "A", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + ], + "meta" => %{"total" => 2, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{"id" => 2, "first_name" => "Partial", "last_name" => "Stub", + "is_active" => true, "is_partial" => true, "contactFields" => []} + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.imported_partial == 1 + assert summary.coverage_backfill.imported_full == 0 + assert summary.coverage_backfill.unresolved_gap == 0 + + record = Imports.find_import_record(account_id, "monica_api", "contact", "2") + refute is_nil(record) +end +``` + +Run, expected PASS. + +- [ ] **Step 6.4: Test — no gap, no backfill** + +```elixir +test "no-op when meta.total matches distinct imported", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + request_count = :counters.new(1, []) + + Req.Test.stub(@stub_name, fn conn -> + :counters.add(request_count, 1, 1) + + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 2, 3], fn id -> + %{"id" => id, "first_name" => "L#{id}", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + end), + "meta" => %{"total" => 3, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/" <> _} -> + flunk("unexpected direct-fetch when no gap exists") + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.gap_detected == 0 + assert summary.coverage_backfill.range_scanned == 0 + # 1 listing call + 1 meta.total recheck = 2 API calls; no per-ID GETs. + assert :counters.get(request_count, 1) == 2 +end +``` + +Run, expected PASS. + +- [ ] **Step 6.5: Test — unresolved gap warning** + +```elixir +test "logs warning and surfaces unresolved_gap when gap can't be closed", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "first_name" => "A", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []}, + %{"id" => 3, "first_name" => "C", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + ], + "meta" => %{"total" => 5, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + log = + ExUnit.CaptureLog.capture_log(fn -> + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.unresolved_gap == 3 + end) + + assert log =~ "Coverage backfill could not close the gap" +end +``` + +Note: this test uses `ExUnit.CaptureLog`. If the test module doesn't already `import ExUnit.CaptureLog`, add the import to the top of the test module. + +Run, expected PASS. + +- [ ] **Step 6.6: Test — early termination when gap closes** + +```elixir +test "stops scanning once gap closes (early termination)", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + request_count = :counters.new(1, []) + + Req.Test.stub(@stub_name, fn conn -> + :counters.add(request_count, 1, 1) + + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + # IDs [1, 100] returned, meta.total=2. We expect only ID 2 to be + # fetched directly before early termination kicks in. + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "first_name" => "A", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []}, + %{"id" => 100, "first_name" => "Z", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + ], + "meta" => %{"total" => 3, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{"id" => 2, "first_name" => "B", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + }) + + {"GET", "/api/contacts/" <> _} -> + flunk("scan should have terminated after closing the gap") + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.unresolved_gap == 0 + assert summary.coverage_backfill.imported_full == 1 +end +``` + +Run, expected PASS. + +- [ ] **Step 6.7: Test — backfilled contact participates in auto-merge** + +```elixir +test "backfilled contact gets auto-merged when matching", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + shared_phone = %{ + "contact_field_type" => %{"type" => "phone", "name" => "Mobile", "protocol" => "tel:"}, + "data" => "+15555550100" + } + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "first_name" => "Same", "last_name" => "Name", + "is_active" => true, "is_partial" => false, + "contactFields" => [shared_phone]} + ], + "meta" => %{"total" => 2, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{"id" => 2, "first_name" => "Same", "last_name" => "Name", + "is_active" => true, "is_partial" => false, + "contactFields" => [shared_phone]} + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "auto_merge_duplicates" => true + }) + + assert summary.coverage_backfill.imported_full == 1 + assert summary.merged == 1 +end +``` + +This test exercises the Phase 1.4 → Phase 1.5 boundary: the backfilled contact 2 has the same name+phone as listed contact 1, so auto-merge collapses it. + +Run, expected PASS. If auto-merge logic disagrees with the spec's claim that backfilled contacts participate, this test reveals it — fix the placement in `crawl/5` (Step 5.1) before continuing. + +- [ ] **Step 6.8: Test — safety margin extends scan past max_id** + +```elixir +test "scans IDs past max_seen up to safety_margin", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + # Listing returns IDs 1..5, meta.total=6. The 6th lives past max_seen=5. + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 2, 3, 4, 5], fn id -> + %{"id" => id, "first_name" => "L#{id}", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + end), + "meta" => %{"total" => 6, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/6"} -> + Req.Test.json(conn, %{ + "data" => %{"id" => 6, "first_name" => "PastMax", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.imported_full == 1 + assert summary.coverage_backfill.unresolved_gap == 0 + + record = Imports.find_import_record(account_id, "monica_api", "contact", "6") + refute is_nil(record) +end +``` + +Run, expected PASS. This verifies `@safety_margin 50` is actually consulted (ID 6 = max_seen + 1, within margin). + +- [ ] **Step 6.9: Test — hard iteration cap enforced** + +```elixir +test "hard cap on iterations leaves unresolved_gap > 0", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + # Listing returns just ID 1. Meta says total=1000. + # min_id = max_id = 1, max_iterations = (1-1) + 100 = 100. + # Even with safety_margin, the scan should cap at 100 GETs and + # leave the rest of the gap unresolved. + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "first_name" => "A", "last_name" => "X", + "is_active" => true, "is_partial" => false, "contactFields" => []} + ], + "meta" => %{"total" => 1000, "last_page" => 1, "current_page" => 1, "per_page" => 100} + }) + + {"GET", "/api/contacts/" <> _} -> + # Every per-ID GET returns 404; gap never closes. + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + log = + ExUnit.CaptureLog.capture_log(fn -> + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.gap_detected == 999 + assert summary.coverage_backfill.range_scanned <= 100 + assert summary.coverage_backfill.unresolved_gap > 0 + end) + + assert log =~ "Coverage backfill could not close the gap" +end +``` + +Run, expected PASS. The `range_scanned <= 100` assertion verifies the hard cap actually engages — without it, the scan would attempt 1000+ GETs. + +- [ ] **Step 6.10: Run the full file** + +```bash +mix test test/kith/imports/sources/monica_api_test.exs +``` + +Expected: all tests pass. Anywhere from ~6 new tests in this task + ~2 from earlier = 8+ new tests, all green, plus all pre-existing tests green. + +- [ ] **Step 6.9: Commit** + +```bash +git add test/kith/imports/sources/monica_api_test.exs +git -c commit.gpgsign=false commit -m "test(monica): edge-case coverage for coverage_check_and_backfill + +Adds: mixed 200+404 closure, inactive skip, partial import, no-op when +no gap, unresolved-gap log+summary, early termination, auto-merge +interaction. Together with the happy path and ref_data threading tests +from earlier tasks, this covers every branch listed in the spec's test +matrix." +``` + +--- + +## Task 7: Quality gate + ship + +- [ ] **Step 7.1: Full test suite** + +```bash +mix test +``` + +Expected: 0 failures across the full project. + +- [ ] **Step 7.2: Static analysis** + +```bash +mix quality +``` + +Expected: clean across format, credo, sobelow, dialyzer. + +If dialyzer complains about the new private functions' specs (Elixir tends to want @spec annotations on private functions when the inferred type is unusually complex), add a focused @spec for `coverage_check_and_backfill/3`: + +```elixir +@spec coverage_check_and_backfill(map(), map(), map() | nil) :: + {map(), map() | nil, map()} +defp coverage_check_and_backfill(ctx, acc, ref_data) do + ... +``` + +Re-run `mix quality`. Repeat if dialyzer flags additional specs needed. + +- [ ] **Step 7.3: Push and confirm CI** + +```bash +git push -u origin fix/monica-import-coverage-backfill +``` + +CI runs ExUnit + Playwright. Confirm both green before opening the PR. + +- [ ] **Step 7.4: Open PR or stack into PR #23** + +Two options. The repeat user-decision from the phone-format fix workflow applies here. Default to opening a separate PR off `main` if `feat/v0.x-multi-area-improvements` has already merged, or stacking on it if it hasn't. + +```bash +# Option A: stack into PR #23 (fast-forward if possible) +git push origin fix/monica-import-coverage-backfill:feat/v0.x-multi-area-improvements + +# Option B: separate PR off main (only if PR #23 already merged) +gh pr create \ + --title "fix(monica): coverage backfill for /api/contacts pagination drops" \ + --body "$(cat <<'EOF' +## Summary +Monica v4's /api/contacts paginated listing silently drops a deterministic +subset of contacts (~1.7% in observed data). This adds a Phase 1.4 +coverage check between the listing crawl and auto-merge that detects the +gap via meta.total comparison and backfills via direct GET /api/contacts/:id, +applying the same is_active and is_partial filters Monica's listing applies. + +Backfilled partial contacts unlock relationship cross-reference resolution +that was previously failing with "Could not resolve first_met_through". + +## Spec +docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md + +## Test plan +- [x] mix test (full suite, 0 failures) +- [x] mix quality (format/credo/sobelow/dialyzer) +- [x] New tests: happy path, mixed 200+404, inactive skip, partial import, + no-op when no gap, unresolved-gap warning, early termination, + auto-merge interaction +- [ ] Manual: trigger a Monica import on the user's account, observe + coverage_backfill.{gap_detected, imported_full, imported_partial, + unresolved_gap} in import.summary +EOF +)" +``` + +--- + +## Done Criteria + +1. `mix test` reports 0 failures with the new coverage_backfill tests passing. +2. `mix quality` is clean. +3. `coverage_check_and_backfill/3` is wired between Phase 1 and Phase 1.5 in `crawl/5`. +4. Import summary carries `coverage_backfill.{gap_detected, range_scanned, imported_full, imported_partial, skipped_deleted, skipped_inactive, errors, unresolved_gap}`. +5. The `unresolved_gap` field surfaces a warning log line when > 0. +6. Re-running the user's Monica import (manual smoke) returns `coverage_backfill.imported_full + coverage_backfill.imported_partial` equal to the original 18-contact gap, with `unresolved_gap: 0`. +7. The PR description references this plan and the spec. From 16596c9387372c8eecefad48b16ddf2e836df279 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 01:00:56 +0300 Subject: [PATCH 53/58] refactor(monica): thread ref_data through crawl_all_contacts return Phase 1.4 (coverage backfill, next commits) needs ref_data so it can call safe_import_api_contact/5 on directly-fetched contacts. crawl_all_contacts/1 was already building ref_data per page but discarding it on return; this commit threads it through to the orchestrator. No behavior change. --- lib/kith/imports/sources/monica_api.ex | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index b7566f2..06d0078 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -90,7 +90,8 @@ defmodule Kith.Imports.Sources.MonicaApi do } # Phase 1: Crawl contacts - {acc, deferred} = crawl_all_contacts(ctx) + {acc, deferred, ref_data} = crawl_all_contacts(ctx) + _ = ref_data # Phase 1.5: Auto-merge definite duplicates (optional) merge_result = @@ -182,22 +183,22 @@ defmodule Kith.Imports.Sources.MonicaApi do handle_contacts_page(ctx, state, contacts, meta) {:ok, %{"data" => [], "meta" => _}} -> - {state.acc, state.deferred} + {state.acc, state.deferred, state.ref_data} {:ok, unexpected} -> Logger.error("[MonicaApi] Unexpected contacts response: #{inspect(unexpected)}") acc = add_error(state.acc, "Unexpected API response format from contacts endpoint") - {acc, state.deferred} + {acc, state.deferred, state.ref_data} {:error, :rate_limited} -> acc = add_error(state.acc, "Rate limited by Monica API after retries") - {acc, state.deferred} + {acc, state.deferred, state.ref_data} {:error, reason} -> acc = add_error(state.acc, "Failed to fetch contacts page #{state.page}: #{inspect(reason)}") - {acc, state.deferred} + {acc, state.deferred, state.ref_data} end end @@ -231,7 +232,7 @@ defmodule Kith.Imports.Sources.MonicaApi do crawl_contacts_loop(ctx, next_state) else - {acc, deferred} + {acc, deferred, ref_data} end end From 4be5337fba6d80efc1c5f00110193fbb7a74bf52 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 01:08:33 +0300 Subject: [PATCH 54/58] feat(monica): add backfill helpers (fetch_single_contact, accept_backfill_response) fetch_single_contact/2 wraps api_get/3 to distinguish 404 (Monica soft-delete, expected) from other errors. Returns {:ok, contact} | :not_found | {:error, reason}. accept_backfill_response/1 mirrors Monica's listing filter (->real()->active() = is_active=1 AND is_partial=0) on direct-GET responses so the backfill won't import contacts Monica hides from the listing. Partials are still accepted because they anchor relationship targets. Both helpers are unused in this commit; @compile {:nowarn_unused_function, ...} directive suppresses warnings until coverage_check_and_backfill/3 wires them in the next commit. --- lib/kith/imports/sources/monica_api.ex | 41 ++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index 06d0078..465ccf8 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -41,6 +41,11 @@ defmodule Kith.Imports.Sources.MonicaApi do @page_limit 100 + # Tasks 2-3 of the coverage-backfill plan add private helpers that aren't + # wired in until Task 4. Suppress warnings until then; this directive is + # removed in the Task 4 commit when the helpers become reachable. + @compile {:nowarn_unused_function, fetch_single_contact: 2, accept_backfill_response: 1} + # ── Behaviour callbacks ─────────────────────────────────────────────── @impl true @@ -1188,6 +1193,42 @@ defmodule Kith.Imports.Sources.MonicaApi do end end + defp fetch_single_contact(credential, monica_id) do + url = "#{credential.url}/api/contacts/#{monica_id}" + + case api_get(credential, url, []) do + {:ok, %{status: 200, body: %{"data" => contact}}} when is_map(contact) -> + {:ok, contact} + + {:ok, %{status: 404}} -> + :not_found + + {:ok, %{status: 429}} -> + {:error, :rate_limited} + + {:ok, %{status: status}} -> + {:error, "Unexpected status: #{status}"} + + {:error, reason} -> + {:error, reason} + end + end + + # Mirror Monica's listing filter on direct-GET responses: + # - Monica's index() chains ->real()->active() which means + # is_active = 1 AND is_partial = 0. + # - Partials still anchor relationship targets, so we accept them + # (relationship resolution depends on importing the partial stubs). + # - Inactive contacts are deliberately hidden by Monica's UI; skip them. + defp accept_backfill_response(%{"is_active" => true, "is_partial" => true}), + do: :import_partial + + defp accept_backfill_response(%{"is_active" => true, "is_partial" => false}), + do: :import_full + + defp accept_backfill_response(%{"is_active" => false}), do: :skip_inactive + defp accept_backfill_response(_other), do: :skip_inactive + # ── Date parsing helpers ───────────────────────────────────────────── defp parse_special_date(nil), do: %{} From 4ce8a3eaae262c999f2a87aa04428eafbf903f6b Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 01:17:51 +0300 Subject: [PATCH 55/58] feat(monica): coverage_check_and_backfill/3 core algorithm Implements Phase 1.4 logic: re-fetch meta.total, compare against import_records, iterate [min_id..max_id+50] for unseen IDs, dispatch each via fetch_single_contact + accept_backfill_response, early-terminate when gap closes, cap iterations at (max_id-min_id)+100 to guarantee termination. Stats accumulator covers gap_detected, range_scanned, imported_full, imported_partial, skipped_deleted, skipped_inactive, errors, unresolved_gap. Removes the @compile {:nowarn_unused_function, ...} directive added in the previous commit (it didn't work in Elixir 1.18.4 anyway, and the helpers are now used by coverage_check_and_backfill/3). Wiring into crawl/5 lands in the next commit. --- lib/kith/imports/sources/monica_api.ex | 212 ++++++++++++++++++++++++- 1 file changed, 207 insertions(+), 5 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index 465ccf8..ecba5a4 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -41,11 +41,6 @@ defmodule Kith.Imports.Sources.MonicaApi do @page_limit 100 - # Tasks 2-3 of the coverage-backfill plan add private helpers that aren't - # wired in until Task 4. Suppress warnings until then; this directive is - # removed in the Task 4 commit when the helpers become reachable. - @compile {:nowarn_unused_function, fetch_single_contact: 2, accept_backfill_response: 1} - # ── Behaviour callbacks ─────────────────────────────────────────────── @impl true @@ -273,6 +268,213 @@ defmodule Kith.Imports.Sources.MonicaApi do {add_error(acc, msg), deferred} end + # ── Phase 1.4: Coverage check + backfill ────────────────────────────── + # + # Monica's /api/contacts listing endpoint silently drops a subset of + # contacts under LIMIT/OFFSET pagination over its default sort (this is + # a v4 server-side issue we can't fix). We compensate by re-fetching + # meta.total and any IDs in [min_seen, max_seen + safety_margin] that + # weren't returned by the listing. + # + # See docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md + # for full design context. + + @safety_margin 50 + @max_iterations_buffer 100 + + defp coverage_check_and_backfill(ctx, acc, ref_data) do + case fetch_meta_total(ctx.credential) do + {:ok, monica_total} -> + do_backfill(ctx, acc, ref_data, monica_total) + + {:error, _reason} -> + # Can't determine gap; pass through with zeroed coverage stats. + {acc, ref_data, empty_backfill_stats()} + end + end + + defp fetch_meta_total(credential) do + url = "#{credential.url}/api/contacts" + + case api_get_json(credential, url, limit: 1, page: 1) do + {:ok, %{"meta" => %{"total" => total}}} when is_integer(total) -> {:ok, total} + _ -> {:error, :unknown_total} + end + end + + defp do_backfill(ctx, acc, ref_data, monica_total) do + seen_ids = seen_source_ids(ctx.import_job.id) + + if Enum.empty?(seen_ids) do + # Nothing imported by listing; refuse to scan an unbounded range. + {acc, ref_data, empty_backfill_stats(gap: monica_total)} + else + min_id = Enum.min(seen_ids) + max_id = Enum.max(seen_ids) + gap = monica_total - MapSet.size(seen_ids) + + stats = %{ + gap_detected: gap, + range_scanned: 0, + imported_full: 0, + imported_partial: 0, + skipped_deleted: 0, + skipped_inactive: 0, + errors: 0, + unresolved_gap: 0 + } + + if gap <= 0 do + {acc, ref_data, %{stats | unresolved_gap: 0}} + else + scan_gap_range(ctx, acc, ref_data, seen_ids, min_id, max_id, monica_total, stats) + end + end + end + + defp seen_source_ids(import_id) do + from(ir in Imports.ImportRecord, + where: + ir.import_id == ^import_id and + ir.source_entity_type == "contact", + select: ir.source_entity_id + ) + |> Repo.all() + |> Enum.flat_map(fn s -> + case Integer.parse(s) do + {n, ""} -> [n] + _ -> [] + end + end) + |> MapSet.new() + end + + defp scan_gap_range(ctx, acc, ref_data, seen_ids, min_id, max_id, monica_total, stats) do + scan_start = min_id + scan_end = max_id + @safety_margin + max_iterations = max_id - min_id + @max_iterations_buffer + + Logger.info( + "[MonicaApi] Coverage backfill scanning [#{scan_start}..#{scan_end}] " <> + "(seen=#{MapSet.size(seen_ids)}, monica_total=#{monica_total}, gap=#{stats.gap_detected})" + ) + + candidates = + scan_start..scan_end + |> Enum.reject(&MapSet.member?(seen_ids, &1)) + |> Enum.take(max_iterations) + + initial = {acc, ref_data, stats, seen_ids} + + {final_acc, final_ref_data, final_stats, final_seen} = + Enum.reduce_while(candidates, initial, fn id, {acc, ref_data, stats, seen} -> + if MapSet.size(seen) >= monica_total do + {:halt, {acc, ref_data, stats, seen}} + else + case fetch_and_dispatch_backfill(ctx, id, acc, ref_data) do + {:imported_full, new_acc, new_ref_data} -> + {:cont, + {new_acc, new_ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + imported_full: stats.imported_full + 1 + }, MapSet.put(seen, id)}} + + {:imported_partial, new_acc, new_ref_data} -> + {:cont, + {new_acc, new_ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + imported_partial: stats.imported_partial + 1 + }, MapSet.put(seen, id)}} + + :skipped_deleted -> + {:cont, + {acc, ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + skipped_deleted: stats.skipped_deleted + 1 + }, seen}} + + :skipped_inactive -> + {:cont, + {acc, ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + skipped_inactive: stats.skipped_inactive + 1 + }, seen}} + + {:error, _reason} -> + {:cont, + {acc, ref_data, + %{stats | range_scanned: stats.range_scanned + 1, errors: stats.errors + 1}, + seen}} + end + end + end) + + unresolved = max(0, monica_total - MapSet.size(final_seen)) + + if unresolved > 0 do + Logger.warning( + "[MonicaApi] Coverage backfill could not close the gap: " <> + "monica_total=#{monica_total}, seen=#{MapSet.size(final_seen)}, unresolved=#{unresolved}" + ) + end + + {final_acc, final_ref_data, %{final_stats | unresolved_gap: unresolved}} + end + + defp fetch_and_dispatch_backfill(ctx, monica_id, acc, ref_data) do + case fetch_single_contact(ctx.credential, monica_id) do + :not_found -> + :skipped_deleted + + {:error, reason} -> + {:error, reason} + + {:ok, api_contact} -> + case accept_backfill_response(api_contact) do + :skip_inactive -> + :skipped_inactive + + verdict when verdict in [:import_full, :import_partial] -> + new_ref_data = + build_or_update_ref_data(ctx.account_id, [api_contact], ref_data) + + {new_acc, _new_deferred} = + safe_import_api_contact(ctx, api_contact, new_ref_data, acc, %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }) + + case verdict do + :import_full -> {:imported_full, new_acc, new_ref_data} + :import_partial -> {:imported_partial, new_acc, new_ref_data} + end + end + end + end + + defp empty_backfill_stats(opts \\ []) do + %{ + gap_detected: Keyword.get(opts, :gap, 0), + range_scanned: 0, + imported_full: 0, + imported_partial: 0, + skipped_deleted: 0, + skipped_inactive: 0, + errors: 0, + unresolved_gap: Keyword.get(opts, :gap, 0) + } + end + defp import_api_contact(ctx, api_contact, ref_data, acc, deferred) do source_id = to_string(api_contact["id"]) From 4744984d5f6447eac9fdd33f556f5e2813bceff9 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 01:26:47 +0300 Subject: [PATCH 56/58] feat(monica): wire coverage_check_and_backfill into crawl/5 Phase 1.4 now runs between the listing crawl (Phase 1) and auto-merge (Phase 1.5). Backfilled contacts participate in auto-merge and Phase 2 cross-reference resolution as first-class import-record holders. Import summary now carries coverage_backfill.{gap_detected, range_scanned, imported_full, imported_partial, skipped_deleted, skipped_inactive, errors, unresolved_gap}. The unresolved_gap field is the self-reporting safety net: if it ends up > 0, the operator knows the listing dropped contacts the backfill couldn't recover. The happy-path test exercises the full pipeline end-to-end: listing returns 4 of 5 contacts, meta.total reports 5, backfill issues one direct GET for the missing ID 4, returns 200, and the contact ends up in import_records. Also fix three existing tests to account for the new meta-total call made during coverage backfill phase. --- lib/kith/imports/sources/monica_api.ex | 14 +++- test/kith/imports/sources/monica_api_test.exs | 75 ++++++++++++++++++- 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index ecba5a4..253c07f 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -91,9 +91,13 @@ defmodule Kith.Imports.Sources.MonicaApi do # Phase 1: Crawl contacts {acc, deferred, ref_data} = crawl_all_contacts(ctx) - _ = ref_data - # Phase 1.5: Auto-merge definite duplicates (optional) + # Phase 1.4: Coverage check + backfill any silently-dropped contacts. + # See docs/superpowers/specs/2026-05-17-monica-import-coverage-backfill-design.md + {acc, _ref_data, coverage_stats} = coverage_check_and_backfill(ctx, acc, ref_data) + + # Phase 1.5: Auto-merge definite duplicates (optional). + # Runs AFTER Phase 1.4 so backfilled contacts participate in auto-merge. merge_result = if opts["auto_merge_duplicates"] do auto_merge_duplicates(account_id, import_job) @@ -141,7 +145,8 @@ defmodule Kith.Imports.Sources.MonicaApi do merged: merge_result.merged, error_count: error_count, errors: Enum.take(all_errors, 50), - misc_data_plan: Enum.reverse(deferred.misc_data) + misc_data_plan: Enum.reverse(deferred.misc_data), + coverage_backfill: coverage_stats }} catch :cancelled -> @@ -153,7 +158,8 @@ defmodule Kith.Imports.Sources.MonicaApi do skipped: 0, merged: 0, error_count: 1, - errors: ["Import cancelled"] + errors: ["Import cancelled"], + coverage_backfill: empty_backfill_stats() }} end diff --git a/test/kith/imports/sources/monica_api_test.exs b/test/kith/imports/sources/monica_api_test.exs index 2d4e001..58643cb 100644 --- a/test/kith/imports/sources/monica_api_test.exs +++ b/test/kith/imports/sources/monica_api_test.exs @@ -230,6 +230,8 @@ defmodule Kith.Imports.Sources.MonicaApiTest do case page_num do 1 -> Req.Test.json(conn, contacts_page_json(page1, 1, 2, 5)) 2 -> Req.Test.json(conn, contacts_page_json(page2, 2, 2, 5)) + # 3 = fetch_meta_total call during coverage backfill + 3 -> Req.Test.json(conn, contacts_page_json(page1 ++ page2, 1, 1, 5)) end end) @@ -237,8 +239,8 @@ defmodule Kith.Imports.Sources.MonicaApiTest do assert {:ok, summary} = MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) assert summary.contacts == 5 - # Verify both pages were fetched - assert Agent.get(agent, & &1) == 2 + # Verify both pages + meta-total were fetched + assert Agent.get(agent, & &1) == 3 Agent.stop(agent) end @@ -313,6 +315,8 @@ defmodule Kith.Imports.Sources.MonicaApiTest do case page_num do 1 -> Req.Test.json(conn, contacts_page_json([alice], 1, 2, 2)) 2 -> Req.Test.json(conn, contacts_page_json([bob], 2, 2, 2)) + # 3 = fetch_meta_total call during coverage backfill + 3 -> Req.Test.json(conn, contacts_page_json([alice, bob], 1, 1, 2)) end end) @@ -579,8 +583,8 @@ defmodule Kith.Imports.Sources.MonicaApiTest do "extra_notes" => true }) - # Only the contacts page should have been fetched - assert Agent.get(agent, & &1) == 1 + # Contacts page + meta-total call during coverage backfill + assert Agent.get(agent, & &1) == 2 Agent.stop(agent) end end @@ -1527,4 +1531,67 @@ defmodule Kith.Imports.Sources.MonicaApiTest do assert length(active) == 1 end end + + describe "coverage_check_and_backfill" do + test "closes a single-ID gap via direct fetch", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + # Listing call — return IDs 1, 2, 3, 5 (ID 4 missing) with meta.total=5 + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 2, 3, 5], fn id -> + %{ + "id" => id, + "first_name" => "Listed#{id}", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + end), + "meta" => %{ + "total" => 5, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/4"} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 4, + "first_name" => "Backfilled4", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "auto_merge_duplicates" => false + }) + + assert summary.coverage_backfill.gap_detected == 1 + assert summary.coverage_backfill.imported_full == 1 + assert summary.coverage_backfill.imported_partial == 0 + assert summary.coverage_backfill.skipped_deleted == 0 + assert summary.coverage_backfill.skipped_inactive == 0 + assert summary.coverage_backfill.unresolved_gap == 0 + assert summary.imported == 5 + + record = Imports.find_import_record(account_id, "monica_api", "contact", "4") + refute is_nil(record) + end + end end From 3039247c57e05b76864a6c4fdb71e7ae9a476299 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 01:32:01 +0300 Subject: [PATCH 57/58] test(monica): edge-case coverage for coverage_check_and_backfill Adds: mixed 200+404 closure, inactive skip, partial import, no-op when no gap, unresolved-gap log+summary, early termination, auto-merge interaction, safety margin (past max_seen), and hard iteration cap. Together with the happy path from the previous commit, this covers every branch listed in the spec's test matrix. --- test/kith/imports/sources/monica_api_test.exs | 472 ++++++++++++++++++ 1 file changed, 472 insertions(+) diff --git a/test/kith/imports/sources/monica_api_test.exs b/test/kith/imports/sources/monica_api_test.exs index 58643cb..30f4365 100644 --- a/test/kith/imports/sources/monica_api_test.exs +++ b/test/kith/imports/sources/monica_api_test.exs @@ -6,6 +6,7 @@ defmodule Kith.Imports.Sources.MonicaApiTest do alias Kith.Contacts alias Kith.Repo + import ExUnit.CaptureLog import Kith.AccountsFixtures import Kith.ContactsFixtures import Kith.ImportsFixtures @@ -1593,5 +1594,476 @@ defmodule Kith.Imports.Sources.MonicaApiTest do record = Imports.find_import_record(account_id, "monica_api", "contact", "4") refute is_nil(record) end + + test "closes a 1-of-2 gap when one direct fetch 404s", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 3, 5], fn id -> + %{ + "id" => id, + "first_name" => "L#{id}", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + end), + "meta" => %{ + "total" => 5, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/2"} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + + {"GET", "/api/contacts/4"} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 4, + "first_name" => "B4", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.gap_detected == 2 + assert summary.coverage_backfill.imported_full == 1 + # ID 2 is a deleted/404; the safety-margin scan past max_seen also yields 404s + assert summary.coverage_backfill.skipped_deleted >= 1 + assert summary.coverage_backfill.unresolved_gap == 1 + end + + test "skips inactive contact in gap", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 1, + "first_name" => "A", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + ], + "meta" => %{ + "total" => 2, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 2, + "first_name" => "Inactive", + "last_name" => "X", + "is_active" => false, + "is_partial" => false, + "contactFields" => [] + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.skipped_inactive == 1 + assert summary.coverage_backfill.imported_full == 0 + assert summary.coverage_backfill.unresolved_gap == 1 + + refute Imports.find_import_record(account_id, "monica_api", "contact", "2") + end + + test "imports partial contact in gap (relationships need it)", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 1, + "first_name" => "A", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + ], + "meta" => %{ + "total" => 2, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 2, + "first_name" => "Partial", + "last_name" => "Stub", + "is_active" => true, + "is_partial" => true, + "contactFields" => [] + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.imported_partial == 1 + assert summary.coverage_backfill.imported_full == 0 + assert summary.coverage_backfill.unresolved_gap == 0 + + record = Imports.find_import_record(account_id, "monica_api", "contact", "2") + refute is_nil(record) + end + + test "no-op when meta.total matches distinct imported", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + request_count = :counters.new(1, []) + + Req.Test.stub(@stub_name, fn conn -> + :counters.add(request_count, 1, 1) + + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 2, 3], fn id -> + %{ + "id" => id, + "first_name" => "L#{id}", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + end), + "meta" => %{ + "total" => 3, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/" <> _} -> + flunk("unexpected direct-fetch when no gap exists") + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.gap_detected == 0 + assert summary.coverage_backfill.range_scanned == 0 + # 1 listing call + 1 meta.total recheck = 2 API calls; no per-ID GETs. + assert :counters.get(request_count, 1) == 2 + end + + test "logs warning and surfaces unresolved_gap when gap can't be closed", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 1, + "first_name" => "A", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + }, + %{ + "id" => 3, + "first_name" => "C", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + ], + "meta" => %{ + "total" => 5, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + log = + capture_log(fn -> + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.unresolved_gap == 3 + end) + + assert log =~ "Coverage backfill could not close the gap" + end + + test "stops scanning once gap closes (early termination)", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 1, + "first_name" => "A", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + }, + %{ + "id" => 100, + "first_name" => "Z", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + ], + "meta" => %{ + "total" => 3, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 2, + "first_name" => "B", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + }) + + {"GET", "/api/contacts/" <> _} -> + flunk("scan should have terminated after closing the gap") + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.unresolved_gap == 0 + assert summary.coverage_backfill.imported_full == 1 + end + + test "backfilled contact gets auto-merged when matching", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + shared_phone = %{ + "contact_field_type" => %{"type" => "phone", "name" => "Mobile", "protocol" => "tel:"}, + "content" => "+15555550100" + } + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 1, + "first_name" => "Same", + "last_name" => "Name", + "is_active" => true, + "is_partial" => false, + "contactFields" => [shared_phone] + } + ], + "meta" => %{ + "total" => 2, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/2"} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 2, + "first_name" => "Same", + "last_name" => "Name", + "is_active" => true, + "is_partial" => false, + "contactFields" => [shared_phone] + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "auto_merge_duplicates" => true + }) + + assert summary.coverage_backfill.imported_full == 1 + assert summary.merged == 1 + end + + test "scans IDs past max_seen up to safety_margin", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => + Enum.map([1, 2, 3, 4, 5], fn id -> + %{ + "id" => id, + "first_name" => "L#{id}", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + end), + "meta" => %{ + "total" => 6, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/6"} -> + Req.Test.json(conn, %{ + "data" => %{ + "id" => 6, + "first_name" => "PastMax", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.imported_full == 1 + assert summary.coverage_backfill.unresolved_gap == 0 + + record = Imports.find_import_record(account_id, "monica_api", "contact", "6") + refute is_nil(record) + end + + test "hard cap on iterations leaves unresolved_gap > 0", + %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case {conn.method, conn.request_path} do + {"GET", "/api/contacts"} -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 1, + "first_name" => "A", + "last_name" => "X", + "is_active" => true, + "is_partial" => false, + "contactFields" => [] + } + ], + "meta" => %{ + "total" => 1000, + "last_page" => 1, + "current_page" => 1, + "per_page" => 100 + } + }) + + {"GET", "/api/contacts/" <> _} -> + conn |> Plug.Conn.put_status(404) |> Req.Test.json(%{}) + end + end) + + log = + capture_log(fn -> + {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) + + assert summary.coverage_backfill.gap_detected == 999 + assert summary.coverage_backfill.range_scanned <= 100 + assert summary.coverage_backfill.unresolved_gap > 0 + end) + + assert log =~ "Coverage backfill could not close the gap" + end end end From b96f08c1161626e710c43cca76f2dd6f45a22189 Mon Sep 17 00:00:00 2001 From: Bashar Qassis <23612682+bashar-qassis@users.noreply.github.com> Date: Sun, 17 May 2026 01:36:17 +0300 Subject: [PATCH 58/58] refactor(monica): flatten coverage_check_and_backfill helper nesting Credo --strict flagged two 'nested too deep' findings in scan_gap_range/8 and fetch_and_dispatch_backfill/4 (depth 3, max 2). Surgical fix: extract exactly two helpers, step_backfill/4 (the reduce_while body) and dispatch_accepted_contact/4 (the verdict dispatch). Each function now has nesting depth <= 2. No behavior change. --- lib/kith/imports/sources/monica_api.ex | 147 +++++++++++++------------ 1 file changed, 74 insertions(+), 73 deletions(-) diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index 253c07f..3605616 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -373,54 +373,8 @@ defmodule Kith.Imports.Sources.MonicaApi do initial = {acc, ref_data, stats, seen_ids} {final_acc, final_ref_data, final_stats, final_seen} = - Enum.reduce_while(candidates, initial, fn id, {acc, ref_data, stats, seen} -> - if MapSet.size(seen) >= monica_total do - {:halt, {acc, ref_data, stats, seen}} - else - case fetch_and_dispatch_backfill(ctx, id, acc, ref_data) do - {:imported_full, new_acc, new_ref_data} -> - {:cont, - {new_acc, new_ref_data, - %{ - stats - | range_scanned: stats.range_scanned + 1, - imported_full: stats.imported_full + 1 - }, MapSet.put(seen, id)}} - - {:imported_partial, new_acc, new_ref_data} -> - {:cont, - {new_acc, new_ref_data, - %{ - stats - | range_scanned: stats.range_scanned + 1, - imported_partial: stats.imported_partial + 1 - }, MapSet.put(seen, id)}} - - :skipped_deleted -> - {:cont, - {acc, ref_data, - %{ - stats - | range_scanned: stats.range_scanned + 1, - skipped_deleted: stats.skipped_deleted + 1 - }, seen}} - - :skipped_inactive -> - {:cont, - {acc, ref_data, - %{ - stats - | range_scanned: stats.range_scanned + 1, - skipped_inactive: stats.skipped_inactive + 1 - }, seen}} - - {:error, _reason} -> - {:cont, - {acc, ref_data, - %{stats | range_scanned: stats.range_scanned + 1, errors: stats.errors + 1}, - seen}} - end - end + Enum.reduce_while(candidates, initial, fn id, state -> + step_backfill(ctx, id, state, monica_total) end) unresolved = max(0, monica_total - MapSet.size(final_seen)) @@ -435,35 +389,82 @@ defmodule Kith.Imports.Sources.MonicaApi do {final_acc, final_ref_data, %{final_stats | unresolved_gap: unresolved}} end + defp step_backfill(ctx, id, {acc, ref_data, stats, seen} = state, monica_total) do + if MapSet.size(seen) >= monica_total do + {:halt, state} + else + case fetch_and_dispatch_backfill(ctx, id, acc, ref_data) do + {:imported_full, new_acc, new_ref_data} -> + {:cont, + {new_acc, new_ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + imported_full: stats.imported_full + 1 + }, MapSet.put(seen, id)}} + + {:imported_partial, new_acc, new_ref_data} -> + {:cont, + {new_acc, new_ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + imported_partial: stats.imported_partial + 1 + }, MapSet.put(seen, id)}} + + :skipped_deleted -> + {:cont, + {acc, ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + skipped_deleted: stats.skipped_deleted + 1 + }, seen}} + + :skipped_inactive -> + {:cont, + {acc, ref_data, + %{ + stats + | range_scanned: stats.range_scanned + 1, + skipped_inactive: stats.skipped_inactive + 1 + }, seen}} + + {:error, _reason} -> + {:cont, + {acc, ref_data, + %{stats | range_scanned: stats.range_scanned + 1, errors: stats.errors + 1}, seen}} + end + end + end + defp fetch_and_dispatch_backfill(ctx, monica_id, acc, ref_data) do case fetch_single_contact(ctx.credential, monica_id) do - :not_found -> - :skipped_deleted + :not_found -> :skipped_deleted + {:error, reason} -> {:error, reason} + {:ok, api_contact} -> dispatch_accepted_contact(ctx, api_contact, acc, ref_data) + end + end - {:error, reason} -> - {:error, reason} + defp dispatch_accepted_contact(ctx, api_contact, acc, ref_data) do + case accept_backfill_response(api_contact) do + :skip_inactive -> + :skipped_inactive + + verdict when verdict in [:import_full, :import_partial] -> + new_ref_data = build_or_update_ref_data(ctx.account_id, [api_contact], ref_data) + + {new_acc, _new_deferred} = + safe_import_api_contact(ctx, api_contact, new_ref_data, acc, %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }) - {:ok, api_contact} -> - case accept_backfill_response(api_contact) do - :skip_inactive -> - :skipped_inactive - - verdict when verdict in [:import_full, :import_partial] -> - new_ref_data = - build_or_update_ref_data(ctx.account_id, [api_contact], ref_data) - - {new_acc, _new_deferred} = - safe_import_api_contact(ctx, api_contact, new_ref_data, acc, %{ - first_met_through: [], - relationships: [], - extra_notes: [], - misc_data: [] - }) - - case verdict do - :import_full -> {:imported_full, new_acc, new_ref_data} - :import_partial -> {:imported_partial, new_acc, new_ref_data} - end + case verdict do + :import_full -> {:imported_full, new_acc, new_ref_data} + :import_partial -> {:imported_partial, new_acc, new_ref_data} end end end