diff --git a/.dialyzer_ignore.exs b/.dialyzer_ignore.exs new file mode 100644 index 0000000..5906f2e --- /dev/null +++ b/.dialyzer_ignore.exs @@ -0,0 +1,10 @@ +# Suppress dialyzer warnings produced by generated code in third-party +# libraries. Re-evaluate this list whenever we upgrade deps. + +[ + # ex_cldr_territories generates type specs that are slightly broader than + # the success typing for these zero-arg accessors on Kith.Cldr.Territory. + # Reported as `:contract_supertype` against lib/kith/cldr.ex (the backend + # module that injects the provider). Not actionable from our code. + {"lib/kith/cldr.ex", :contract_supertype} +] diff --git a/.env.example b/.env.example index 1dffc46..5dd039b 100644 --- a/.env.example +++ b/.env.example @@ -12,6 +12,13 @@ # Core — REQUIRED (no defaults) # ============================================================ SECRET_KEY_BASE=generate-with-mix-phx-gen-secret +# Erlang BEAM distribution cookie. Shared between the app and worker +# containers so they can cluster for cross-container PubSub broadcasts +# (LiveView import progress). Generate with one of: +# mix phx.gen.secret 32 +# openssl rand -base64 32 +RELEASE_COOKIE=generate-with-mix-phx-gen-secret + DATABASE_URL=ecto://kith:change_me@postgres:5432/kith_prod AUTH_TOKEN_SALT=generate-with-mix-phx-gen-secret CLOAK_KEY=generate-32-byte-base64-key @@ -30,10 +37,7 @@ KITH_HOSTNAME=localhost POSTGRES_USER=kith POSTGRES_PASSWORD=change_me POSTGRES_DB=kith_prod -# PostgreSQL port — used by Elixir app (dev/test) AND as Docker host port -# Default 5434 avoids conflicts with standard postgres (5432) -# Inside Docker, the app container overrides this to 5432 (internal network) -DB_PORT=5434 + POOL_SIZE=10 # DATABASE_SSL=false @@ -121,3 +125,23 @@ SENTRY_DSN= SENTRY_ENVIRONMENT=production # Required in production for /metrics endpoint access METRICS_TOKEN=generate-a-random-token + +# ============================================================ +# Docker Host Ports +# ============================================================ +# These configure which HOST ports Compose publishes for each service. +# Defaults match docker-compose.{dev,prod}.yml; override only on conflict. +# Internal container ports are NOT configurable (services talk to each other +# on standard ports over the Docker network). +# +# Dev stack (docker-compose.dev.yml): +DB_PORT=5434 # postgres -> host (default 5434, avoids local 5432) +MAILPIT_SMTP_PORT=1025 # mailpit SMTP listener +MAILPIT_WEB_PORT=8025 # mailpit web UI -> http://localhost:8025 +MINIO_PORT=9000 # MinIO S3 API +MINIO_CONSOLE_PORT=9001 # MinIO web console +APP_PORT=4000 # Phoenix app (only when running via Compose) +# +# Prod stack (docker-compose.prod.yml): +HTTP_PORT=80 # Caddy HTTP (redirects to HTTPS) +HTTPS_PORT=443 # Caddy HTTPS \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index fe5b8d7..8cb088e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -75,7 +75,7 @@ lib/kith/ # Domain layer (contexts + schemas) storage/ # File storage abstraction (local disk / S3) tasks/ # Personal tasks vcard/ # vCard parser + serializer - workers/ # 16 Oban workers across 9 queues + workers/ # 16 Oban workers across 7 queues lib/kith_web/ # Web layer controllers/api/ # REST API controllers (bearer token auth, cursor pagination) @@ -106,7 +106,7 @@ default queries. 30-day trash before permanent purge via `ContactPurgeWorker`. ### Oban background jobs Workers live in `lib/kith/workers/`. Queues: default, mailers, reminders, exports, -imports, immich, purge, photo_sync, api_supplement. Four cron jobs run nightly/weekly. +imports, immich, purge. Four cron jobs run nightly/weekly. Tests use `Oban.Testing` — Oban is disabled in test env. ### REST API conventions diff --git a/config/config.exs b/config/config.exs index 13f1eee..e84d75c 100644 --- a/config/config.exs +++ b/config/config.exs @@ -10,6 +10,14 @@ import Config # Register .vcf (vCard) MIME type for LiveView uploads config :mime, :types, %{"text/vcard" => ["vcf"], "application/json" => ["json"]} +# Default CLDR backend — required so ex_cldr_territories can resolve +# locale-aware territory data without an explicit per-call backend argument. +config :ex_cldr, default_backend: Kith.Cldr + +# Outbound rate limit for Monica API calls. One below the documented +# default of 60 req/min leaves a one-call safety margin. +config :kith, :monica_rate_limit, 55 + config :kith, :scopes, user: [ default: true, @@ -40,8 +48,7 @@ config :kith, Oban, exports: 2, imports: 2, immich: 3, - purge: 1, - photo_sync: 5 + purge: 1 ], plugins: [ Oban.Plugins.Pruner, @@ -118,7 +125,8 @@ config :logger, :default_formatter, :attempt, :max_attempts, :state, - :source + :source, + :import_id ] # Cloak encryption vault — key set per-environment diff --git a/config/runtime.exs b/config/runtime.exs index 944d31e..ddcf644 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -215,6 +215,54 @@ if config_env() == :prod do backend: {Hammer.Backend.Redis, [expiry_ms: 60_000 * 60, redis_url: redis_url]} end + # Oban — only the worker container processes jobs in production. + # The web container can call `Oban.insert/1` to enqueue jobs, but + # runs no queues or plugins (no cron, no pruner) — so it never claims + # rows from `oban_jobs`. The worker container keeps the full config + # from `config.exs`. + # + # Dev (`config_env() == :dev`) is unaffected: this block only runs in + # `:prod`. Test env is pinned to `testing: :manual` in `config/test.exs`. + case System.get_env("KITH_MODE", "web") do + "worker" -> + :ok + + _web -> + config :kith, Oban, queues: false, plugins: false + end + + # libcluster — connect this BEAM node to its peer(s) so Phoenix.PubSub + # broadcasts span containers (web ↔ worker). Configure via + # `KITH_CLUSTER_HOSTS` env var: comma-separated long node names, e.g. + # `kith@app,kith@worker`. Leave unset to disable clustering (single-node). + # + # Each container must also set `RELEASE_DISTRIBUTION=name` and + # `RELEASE_NODE=kith@` so its actual node name matches the + # name listed in `KITH_CLUSTER_HOSTS`. `RELEASE_COOKIE` must be shared. + cluster_hosts = + case System.get_env("KITH_CLUSTER_HOSTS") do + nil -> + [] + + "" -> + [] + + str -> + str + |> String.split(",", trim: true) + |> Enum.map(&(&1 |> String.trim() |> String.to_atom())) + end + + if cluster_hosts != [] do + config :libcluster, + topologies: [ + kith: [ + strategy: Cluster.Strategy.Epmd, + config: [hosts: cluster_hosts] + ] + ] + end + # Sentry error tracking (optional — only when SENTRY_DSN is set) if sentry_dsn = System.get_env("SENTRY_DSN") do config :sentry, diff --git a/config/test.exs b/config/test.exs index 9732fc4..889cf4d 100644 --- a/config/test.exs +++ b/config/test.exs @@ -26,6 +26,14 @@ config :kith, KithWeb.Endpoint, # Disable Oban in tests (use Oban.Testing) config :kith, Oban, testing: :manual +# Use the production libphonenumber metadata in tests so test-only validation +# rules (NANP "555" prefixes, etc.) don't diverge from real behavior. +config :ex_phone_number, metadata_file: Path.join("resources", "PhoneNumberMetadata.xml") + +# Effectively unthrottled in tests — throttle logic is exercised in +# isolation in rate_limiter_test.exs, not via the full crawl integration. +config :kith, :monica_rate_limit, 1_000_000 + # Disable PromEx in tests (its Ecto poller conflicts with sandbox ownership) config :kith, Kith.PromEx, disabled: true diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 357984e..bdb4c16 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,3 +1,14 @@ +# Kith Development Docker Compose +# +# Host ports are configurable via a `.env` file in the project root (auto-loaded +# by Compose). All variables have defaults — `.env` is only needed to override. +# See `.env.example` ("Docker Host Ports" section) for the full list. +# +# Usage: +# docker compose -f docker-compose.dev.yml up -d # infra only +# docker compose -f docker-compose.dev.yml up -d postgres mailpit # subset +# docker compose -f docker-compose.dev.yml --profile app up -d # also run app + services: postgres: image: postgres:15-alpine @@ -16,8 +27,8 @@ services: mailpit: image: axllent/mailpit:latest ports: - - "1025:1025" - - "8025:8025" + - "${MAILPIT_SMTP_PORT:-1025}:1025" + - "${MAILPIT_WEB_PORT:-8025}:8025" minio: image: minio/minio:latest @@ -52,7 +63,7 @@ services: context: . dockerfile: Dockerfile.dev ports: - - "4000:4000" + - "${APP_PORT:-4000}:4000" environment: DB_HOST: postgres DB_PORT: "5432" diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 92ceb98..0185aa9 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -61,6 +61,7 @@ services: app: image: kith:latest command: ["start"] + hostname: app depends_on: migrate: condition: service_completed_successfully @@ -72,6 +73,12 @@ services: tmpfs: - /tmp:size=64M environment: + # ── BEAM distribution / clustering (libcluster Epmd strategy) ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: sname + RELEASE_NODE: kith@app + KITH_CLUSTER_HOSTS: kith@app,kith@worker + # ── existing env vars unchanged ── DATABASE_URL: ${DATABASE_URL} SECRET_KEY_BASE: ${SECRET_KEY_BASE} KITH_HOSTNAME: ${KITH_HOSTNAME:-localhost} @@ -135,6 +142,7 @@ services: worker: image: kith:latest command: ["start"] + hostname: worker security_opt: - no-new-privileges:true cap_drop: @@ -148,6 +156,12 @@ services: migrate: condition: service_completed_successfully environment: + # ── BEAM distribution / clustering (libcluster Epmd strategy) ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: sname + RELEASE_NODE: kith@worker + KITH_CLUSTER_HOSTS: kith@app,kith@worker + # ── existing env vars unchanged ── DATABASE_URL: ${DATABASE_URL} SECRET_KEY_BASE: ${SECRET_KEY_BASE} KITH_HOSTNAME: ${KITH_HOSTNAME:-localhost} @@ -202,8 +216,8 @@ services: caddy: image: caddy:2-alpine ports: - - "80:80" - - "443:443" + - "${HTTP_PORT:-80}:80" + - "${HTTPS_PORT:-443}:443" volumes: - ./Caddyfile:/etc/caddy/Caddyfile:ro - caddy_data:/data diff --git a/docs/superpowers/plans/2026-03-22-extensible-import-system.md b/docs/superpowers/plans/2026-03-22-extensible-import-system.md deleted file mode 100644 index e754690..0000000 --- a/docs/superpowers/plans/2026-03-22-extensible-import-system.md +++ /dev/null @@ -1,2859 +0,0 @@ -# Extensible Import System Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Build an extensible import framework supporting multiple data sources (VCF, Monica CRM), with a behaviour-based plugin architecture, per-contact transactions, import tracking via `import_records`, and a wizard UI with real-time progress. - -**Architecture:** Generic `imports`/`import_records` tables track jobs and source-ID-to-local-ID mappings. A `Source` behaviour defines the plugin contract. `ImportSourceWorker` (Oban) orchestrates any source. Monica source processes in 5 phases: reference data → contacts → children → cross-references → async photo/API sync. Separate Oban workers handle photo downloads and API supplements with rate-limit-aware staggering. - -**Tech Stack:** Elixir, Ecto, Oban, Phoenix LiveView, PostgreSQL, Cloak (encryption) - -**Spec:** `docs/superpowers/specs/2026-03-21-extensible-import-system-design.md` - -**Dependency:** `docs/superpowers/plans/2026-03-22-contact-first-met-fields.md` — must be implemented first. - ---- - -## File Structure - -| File | Action | Responsibility | -|---|---|---| -| `priv/repo/migrations/TIMESTAMP_create_imports_and_import_records.exs` | Create | Migration: imports + import_records tables, indexes, concurrent guard | -| `lib/kith/imports/source.ex` | Create | Source behaviour definition | -| `lib/kith/imports/import.ex` | Create | Import schema (job tracking) | -| `lib/kith/imports/import_record.ex` | Create | ImportRecord schema (source→local ID mapping) | -| `lib/kith/imports.ex` | Create | Imports context module | -| `lib/kith/imports/sources/vcard.ex` | Create | VCard source (wraps existing parser) | -| `lib/kith/imports/sources/monica.ex` | Create | Monica source implementation | -| `lib/kith/workers/import_source_worker.ex` | Create | Generic import Oban worker | -| `lib/kith/workers/photo_sync_worker.ex` | Create | Photo download Oban worker | -| `lib/kith/workers/api_supplement_worker.ex` | Create | API data supplement Oban worker | -| `lib/kith/workers/import_file_cleanup_worker.ex` | Create | Periodic cleanup (30-day retention) | -| `lib/kith_web/live/import_wizard_live.ex` | Create | Import wizard LiveView (replaces existing) | -| `lib/kith_web/live/components/monica_import_component.ex` | Create | Monica-specific form/validation/summary | -| `lib/kith_web/live/components/vcard_import_component.ex` | Create | VCard import UI (wraps existing) | -| `config/config.exs` | Modify | Add photo_sync + api_supplement Oban queues, cleanup cron | -| `lib/kith/contacts/photo.ex` | Modify | Add `pending_sync?/1` helper | -| `test/support/fixtures/imports_fixtures.ex` | Create | Test fixtures for imports | -| `test/kith/imports_test.exs` | Create | Context module tests | -| `test/kith/imports/sources/vcard_test.exs` | Create | VCard source tests | -| `test/kith/imports/sources/monica_test.exs` | Create | Monica source tests | -| `test/kith/workers/import_source_worker_test.exs` | Create | Worker tests | -| `test/kith/workers/photo_sync_worker_test.exs` | Create | Photo sync tests | -| `test/kith/workers/api_supplement_worker_test.exs` | Create | API supplement tests | - ---- - -### Task 1: Migration — Create imports and import_records tables - -**Files:** -- Create: `priv/repo/migrations/TIMESTAMP_create_imports_and_import_records.exs` - -- [ ] **Step 1: Generate the migration file** - -Run: `cd /Users/basharqassis/projects/kith && mix ecto.gen.migration create_imports_and_import_records` - -- [ ] **Step 2: Write the migration** - -```elixir -defmodule Kith.Repo.Migrations.CreateImportsAndImportRecords do - use Ecto.Migration - - def change do - create table(:imports) do - add :account_id, references(:accounts, on_delete: :delete_all), null: false - add :user_id, references(:users, on_delete: :nilify_all), null: false - add :source, :string, null: false - add :status, :string, null: false, default: "pending" - add :file_name, :string - add :file_size, :integer - add :file_storage_key, :string - add :api_url, :string - add :api_key_encrypted, :binary - add :api_options, :map - add :summary, :map - add :started_at, :utc_datetime - add :completed_at, :utc_datetime - timestamps(type: :utc_datetime) - end - - create index(:imports, [:account_id]) - - # Concurrent import guard: only one pending/processing import per account - create unique_index(:imports, [:account_id], - where: "status IN ('pending', 'processing')", - name: :imports_one_active_per_account_idx - ) - - create table(:import_records) do - add :account_id, references(:accounts, on_delete: :delete_all), null: false - add :import_id, references(:imports, on_delete: :delete_all), null: false - add :source, :string, null: false - add :source_entity_type, :string, null: false - add :source_entity_id, :string, null: false - add :local_entity_type, :string, null: false - add :local_entity_id, :bigint, null: false - timestamps(type: :utc_datetime) - end - - create unique_index(:import_records, - [:account_id, :source, :source_entity_type, :source_entity_id], - name: :import_records_source_unique_idx - ) - - create index(:import_records, [:import_id]) - create index(:import_records, [:local_entity_type, :local_entity_id]) - end -end -``` - -- [ ] **Step 3: Run the migration** - -Run: `cd /Users/basharqassis/projects/kith && mix ecto.migrate` -Expected: Migration runs successfully. - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add priv/repo/migrations/*create_imports_and_import_records* -git commit -m "feat: add imports and import_records tables" -``` - ---- - -### Task 2: Source behaviour definition - -**Files:** -- Create: `lib/kith/imports/source.ex` - -- [ ] **Step 1: Create the Source behaviour** - -```elixir -defmodule Kith.Imports.Source do - @moduledoc """ - Behaviour for import source plugins. - - Each source (VCard, Monica, etc.) implements this behaviour to define - how to validate, parse, and import data from that source. - """ - - @type opts :: map() - @type credential :: %{url: String.t(), api_key: String.t()} - @type import_summary :: %{ - contacts: non_neg_integer(), - notes: non_neg_integer(), - skipped: non_neg_integer(), - error_count: non_neg_integer(), - errors: [String.t()] - } - - @callback name() :: String.t() - @callback file_types() :: [String.t()] - @callback validate_file(binary()) :: {:ok, map()} | {:error, String.t()} - @callback parse_summary(binary()) :: {:ok, map()} | {:error, String.t()} - @callback import(account_id :: integer(), user_id :: integer(), data :: binary(), opts()) :: - {:ok, import_summary()} | {:error, term()} - @callback supports_api?() :: boolean() - - @callback test_connection(credential()) :: :ok | {:error, String.t()} - @callback fetch_photo(credential(), resource_id :: String.t()) :: - {:ok, binary()} | {:error, term()} - @callback api_supplement_options() :: [ - %{key: atom(), label: String.t(), description: String.t()} - ] - @callback fetch_supplement(credential(), contact_source_id :: String.t(), key :: atom()) :: - {:ok, map()} | {:error, term()} - - @optional_callbacks [test_connection: 1, fetch_photo: 2, api_supplement_options: 0, fetch_supplement: 3] -end -``` - -- [ ] **Step 2: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/source.ex -git commit -m "feat: define Source behaviour for import plugins" -``` - ---- - -### Task 3: Import and ImportRecord schemas - -**Files:** -- Create: `lib/kith/imports/import.ex` -- Create: `lib/kith/imports/import_record.ex` - -- [ ] **Step 1: Write the Import schema** - -```elixir -defmodule Kith.Imports.Import do - use Ecto.Schema - import Ecto.Changeset - - @statuses ~w(pending processing completed failed cancelled) - - schema "imports" do - field :source, :string - field :status, :string, default: "pending" - field :file_name, :string - field :file_size, :integer - field :file_storage_key, :string - field :api_url, :string - field :api_key_encrypted, Kith.Vault.EncryptedBinary - field :api_options, :map - field :summary, :map - field :started_at, :utc_datetime - field :completed_at, :utc_datetime - - belongs_to :account, Kith.Accounts.Account - belongs_to :user, Kith.Accounts.User - - has_many :import_records, Kith.Imports.ImportRecord - - timestamps(type: :utc_datetime) - end - - def statuses, do: @statuses - - def create_changeset(import, attrs) do - import - |> cast(attrs, [ - :source, :file_name, :file_size, :file_storage_key, - :api_url, :api_key_encrypted, :api_options, - :account_id, :user_id - ]) - |> validate_required([:source, :account_id, :user_id]) - |> validate_inclusion(:source, ["monica", "vcard"]) - |> foreign_key_constraint(:account_id) - |> foreign_key_constraint(:user_id) - |> unique_constraint(:account_id, name: :imports_one_active_per_account_idx, - message: "an import is already in progress") - end - - def status_changeset(import, status, attrs \\ %{}) do - import - |> cast(attrs, [:summary, :started_at, :completed_at]) - |> put_change(:status, status) - |> validate_inclusion(:status, @statuses) - end -end -``` - -- [ ] **Step 2: Write the ImportRecord schema** - -```elixir -defmodule Kith.Imports.ImportRecord do - use Ecto.Schema - import Ecto.Changeset - - schema "import_records" do - field :source, :string - field :source_entity_type, :string - field :source_entity_id, :string - field :local_entity_type, :string - field :local_entity_id, :integer - - belongs_to :account, Kith.Accounts.Account - belongs_to :import, Kith.Imports.Import - - timestamps(type: :utc_datetime) - end - - def changeset(record, attrs) do - record - |> cast(attrs, [ - :source, :source_entity_type, :source_entity_id, - :local_entity_type, :local_entity_id, - :account_id, :import_id - ]) - |> validate_required([ - :source, :source_entity_type, :source_entity_id, - :local_entity_type, :local_entity_id, - :account_id, :import_id - ]) - |> unique_constraint( - [:account_id, :source, :source_entity_type, :source_entity_id], - name: :import_records_source_unique_idx - ) - end -end -``` - -- [ ] **Step 3: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/import.ex lib/kith/imports/import_record.ex -git commit -m "feat: add Import and ImportRecord Ecto schemas" -``` - ---- - -### Task 4: Imports context module - -**Files:** -- Create: `lib/kith/imports.ex` -- Create: `test/support/fixtures/imports_fixtures.ex` -- Create: `test/kith/imports_test.exs` - -- [ ] **Step 1: Write failing tests for context functions** - -Create `test/kith/imports_test.exs`: - -```elixir -defmodule Kith.ImportsTest do - use Kith.DataCase, async: true - - alias Kith.Imports - alias Kith.Imports.{Import, ImportRecord} - - import Kith.AccountsFixtures - import Kith.ContactsFixtures - - setup do - user = user_fixture() - %{user: user, account_id: user.account_id} - end - - describe "create_import/3" do - test "creates an import with valid attrs", %{account_id: account_id, user: user} do - attrs = %{source: "monica", file_name: "export.json", file_size: 1024} - assert {:ok, %Import{} = import} = Imports.create_import(account_id, user.id, attrs) - assert import.source == "monica" - assert import.status == "pending" - assert import.account_id == account_id - end - - test "rejects concurrent imports for same account", %{account_id: account_id, user: user} do - attrs = %{source: "monica", file_name: "export.json", file_size: 1024} - {:ok, _} = Imports.create_import(account_id, user.id, attrs) - assert {:error, :import_in_progress} = Imports.create_import(account_id, user.id, attrs) - end - end - - describe "resolve_source/1" do - test "resolves monica" do - assert Imports.resolve_source("monica") == {:ok, Kith.Imports.Sources.Monica} - end - - test "resolves vcard" do - assert Imports.resolve_source("vcard") == {:ok, Kith.Imports.Sources.VCard} - end - - test "rejects unknown source" do - assert Imports.resolve_source("unknown") == {:error, :unknown_source} - end - end - - describe "record_imported_entity/5" do - test "creates a new import record", %{account_id: account_id, user: user} do - {:ok, import} = Imports.create_import(account_id, user.id, %{source: "monica"}) - contact = contact_fixture(account_id) - - assert {:ok, %ImportRecord{}} = - Imports.record_imported_entity(import, "contact", "uuid-123", "contact", contact.id) - end - - test "upserts on re-import (updates import_id)", %{account_id: account_id, user: user} do - {:ok, import1} = Imports.create_import(account_id, user.id, %{source: "monica"}) - contact = contact_fixture(account_id) - - {:ok, rec1} = Imports.record_imported_entity(import1, "contact", "uuid-123", "contact", contact.id) - - # Complete first import so we can create a second - Imports.update_import_status(import1, "completed", %{completed_at: DateTime.utc_now()}) - - {:ok, import2} = Imports.create_import(account_id, user.id, %{source: "monica"}) - {:ok, rec2} = Imports.record_imported_entity(import2, "contact", "uuid-123", "contact", contact.id) - - assert rec2.id == rec1.id - assert rec2.import_id == import2.id - end - end - - describe "find_import_record/4" do - test "finds existing record", %{account_id: account_id, user: user} do - {:ok, import} = Imports.create_import(account_id, user.id, %{source: "monica"}) - contact = contact_fixture(account_id) - Imports.record_imported_entity(import, "contact", "uuid-123", "contact", contact.id) - - assert %ImportRecord{} = Imports.find_import_record(account_id, "monica", "contact", "uuid-123") - end - - test "returns nil for nonexistent", %{account_id: account_id} do - assert is_nil(Imports.find_import_record(account_id, "monica", "contact", "missing")) - end - end - - describe "update_import_status/3" do - test "updates status and optional fields", %{account_id: account_id, user: user} do - {:ok, import} = Imports.create_import(account_id, user.id, %{source: "monica"}) - now = DateTime.utc_now() |> DateTime.truncate(:second) - - {:ok, updated} = Imports.update_import_status(import, "processing", %{started_at: now}) - assert updated.status == "processing" - assert updated.started_at == now - end - end -end -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports_test.exs -v` -Expected: FAIL — module `Kith.Imports` not found. - -- [ ] **Step 3: Write the Imports context module** - -Create `lib/kith/imports.ex`: - -```elixir -defmodule Kith.Imports do - @moduledoc """ - The Imports context — manages import jobs, source resolution, and import record tracking. - """ - - import Ecto.Query, warn: false - alias Kith.Repo - alias Kith.Imports.{Import, ImportRecord} - - @sources %{ - "monica" => Kith.Imports.Sources.Monica, - "vcard" => Kith.Imports.Sources.VCard - } - - ## Import Jobs - - def create_import(account_id, user_id, attrs) do - # Application-level check first (friendlier error) - if has_active_import?(account_id) do - {:error, :import_in_progress} - else - %Import{account_id: account_id, user_id: user_id} - |> Import.create_changeset(attrs) - |> Repo.insert() - |> case do - {:ok, import} -> {:ok, import} - {:error, %{errors: [{:account_id, {_, [constraint: :unique, constraint_name: "imports_one_active_per_account_idx"]}} | _]}} -> - {:error, :import_in_progress} - {:error, changeset} -> {:error, changeset} - end - end - end - - def get_import!(id), do: Repo.get!(Import, id) - - def get_import(id), do: Repo.get(Import, id) - - def update_import_status(%Import{} = import, status, attrs \\ %{}) do - import - |> Import.status_changeset(status, attrs) - |> Repo.update() - end - - def cancel_import(%Import{} = import) do - update_import_status(import, "cancelled") - end - - def get_active_import(account_id) do - Import - |> where([i], i.account_id == ^account_id) - |> where([i], i.status in ["pending", "processing"]) - |> Repo.one() - end - - defp has_active_import?(account_id) do - Import - |> where([i], i.account_id == ^account_id) - |> where([i], i.status in ["pending", "processing"]) - |> Repo.exists?() - end - - ## Source Resolution - - def resolve_source(source) when is_binary(source) do - case Map.get(@sources, source) do - nil -> {:error, :unknown_source} - mod -> {:ok, mod} - end - end - - ## Import Records - - def find_import_record(account_id, source, source_entity_type, source_entity_id) do - ImportRecord - |> where([r], r.account_id == ^account_id) - |> where([r], r.source == ^source) - |> where([r], r.source_entity_type == ^source_entity_type) - |> where([r], r.source_entity_id == ^source_entity_id) - |> Repo.one() - end - - def record_imported_entity(%Import{} = import, source_entity_type, source_entity_id, local_entity_type, local_entity_id) do - now = DateTime.utc_now() |> DateTime.truncate(:second) - - %ImportRecord{} - |> ImportRecord.changeset(%{ - account_id: import.account_id, - import_id: import.id, - source: import.source, - source_entity_type: source_entity_type, - source_entity_id: source_entity_id, - local_entity_type: local_entity_type, - local_entity_id: local_entity_id - }) - |> Repo.insert( - on_conflict: [set: [import_id: import.id, updated_at: now]], - conflict_target: {:unsafe_fragment, ~s|("account_id", "source", "source_entity_type", "source_entity_id")|}, - returning: true - ) - end - - def wipe_api_key(%Import{} = import) do - import - |> Ecto.Changeset.change(api_key_encrypted: nil) - |> Repo.update() - end - - def pending_async_jobs_count(import_id) do - Oban.Job - |> where([j], fragment("? ->> 'import_id' = ?", j.args, ^to_string(import_id))) - |> where([j], j.state in ["available", "scheduled", "executing", "retryable"]) - |> Repo.aggregate(:count) - end -end -``` - -- [ ] **Step 4: Create test fixtures** - -Create `test/support/fixtures/imports_fixtures.ex`: - -```elixir -defmodule Kith.ImportsFixtures do - @moduledoc "Test helpers for the Imports context." - - alias Kith.Imports - - def import_fixture(account_id, user_id, attrs \\ %{}) do - attrs = Enum.into(attrs, %{source: "monica", file_name: "export.json", file_size: 1024}) - {:ok, import} = Imports.create_import(account_id, user_id, attrs) - import - end -end -``` - -- [ ] **Step 5: Run tests to verify they pass** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports_test.exs -v` -Expected: All PASS (some tests may fail because Monica/VCard source modules don't exist yet — that's fine, the `resolve_source` tests will be the ones that fail. If so, skip those for now and they'll pass after Task 5/6). - -- [ ] **Step 6: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports.ex test/kith/imports_test.exs test/support/fixtures/imports_fixtures.ex -git commit -m "feat: add Imports context with job management and record tracking" -``` - ---- - -### Task 5: VCard source adapter - -**Files:** -- Create: `lib/kith/imports/sources/vcard.ex` -- Create: `test/kith/imports/sources/vcard_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/imports/sources/vcard_test.exs`: - -```elixir -defmodule Kith.Imports.Sources.VCardTest do - use Kith.DataCase, async: true - - alias Kith.Imports.Sources.VCard, as: VCardSource - - describe "name/0" do - test "returns source name" do - assert VCardSource.name() == "vCard" - end - end - - describe "file_types/0" do - test "returns accepted file types" do - assert VCardSource.file_types() == [".vcf"] - end - end - - describe "supports_api?/0" do - test "returns false" do - refute VCardSource.supports_api?() - end - end - - describe "validate_file/1" do - test "validates a proper vCard file" do - data = "BEGIN:VCARD\r\nVERSION:3.0\r\nFN:Jane Doe\r\nEND:VCARD\r\n" - assert {:ok, _} = VCardSource.validate_file(data) - end - - test "rejects invalid data" do - assert {:error, _} = VCardSource.validate_file("not a vcard") - end - end - - describe "parse_summary/1" do - test "returns contact count" do - data = """ - BEGIN:VCARD\r\nVERSION:3.0\r\nFN:Jane Doe\r\nEND:VCARD\r\n\ - BEGIN:VCARD\r\nVERSION:3.0\r\nFN:John Smith\r\nEND:VCARD\r\n\ - """ - assert {:ok, %{contacts: 2}} = VCardSource.parse_summary(data) - end - end -end -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/vcard_test.exs -v` -Expected: FAIL — module not found. - -- [ ] **Step 3: Implement VCard source** - -Create `lib/kith/imports/sources/vcard.ex`: - -```elixir -defmodule Kith.Imports.Sources.VCard do - @moduledoc """ - VCard import source. Wraps the existing `Kith.VCard.Parser`. - """ - - @behaviour Kith.Imports.Source - - alias Kith.VCard.Parser - alias Kith.Contacts - alias Kith.Imports - - require Logger - - @impl true - def name, do: "vCard" - - @impl true - def file_types, do: [".vcf"] - - @impl true - def supports_api?, do: false - - @impl true - def validate_file(data) do - if String.contains?(data, "BEGIN:VCARD") do - {:ok, %{}} - else - {:error, "File does not appear to be a valid vCard file"} - end - end - - @impl true - def parse_summary(data) do - case Parser.parse(data) do - {:ok, contacts} -> {:ok, %{contacts: length(contacts)}} - {:error, reason} -> {:error, reason} - end - end - - @impl true - def import(account_id, user_id, data, opts) do - import_record = opts[:import] - - case Parser.parse(data) do - {:ok, parsed_contacts} -> - total = length(parsed_contacts) - topic = "import:#{account_id}" - broadcast_interval = max(1, div(total, 50)) - - result = - parsed_contacts - |> Enum.with_index(1) - |> Enum.reduce(%{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, fn {parsed, idx}, acc -> - # Check cancellation - if import_record && rem(idx, 10) == 0 do - refreshed = Imports.get_import!(import_record.id) - if refreshed.status == "cancelled", do: throw(:cancelled) - end - - result = - try do - case Contacts.import_contact(account_id, parsed) do - {:ok, contact} -> - if import_record do - source_id = "vcard-#{idx}" - Imports.record_imported_entity(import_record, "contact", source_id, "contact", contact.id) - end - %{acc | contacts: acc.contacts + 1} - - {:error, reason} -> - add_error(acc, "Contact #{idx}: #{inspect(reason)}") - end - rescue - e -> - add_error(acc, "Contact #{idx}: #{Exception.message(e)}") - end - - if rem(idx, broadcast_interval) == 0 || idx == total do - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_progress, %{current: idx, total: total}}) - end - - result - end) - - {:ok, result} - - {:error, reason} -> - {:error, reason} - end - catch - :cancelled -> {:ok, %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: ["Import cancelled"]}} - end - - defp add_error(acc, msg) do - errors = if length(acc.errors) < 50, do: acc.errors ++ [msg], else: acc.errors - %{acc | skipped: acc.skipped + 1, error_count: acc.error_count + 1, errors: errors} - end -end -``` - -- [ ] **Step 4: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/vcard_test.exs -v` -Expected: All PASS - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/sources/vcard.ex test/kith/imports/sources/vcard_test.exs -git commit -m "feat: add VCard import source adapter" -``` - ---- - -### Task 6: ImportSourceWorker — Generic Oban worker - -**Files:** -- Create: `lib/kith/workers/import_source_worker.ex` -- Create: `test/kith/workers/import_source_worker_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/workers/import_source_worker_test.exs`: - -```elixir -defmodule Kith.Workers.ImportSourceWorkerTest do - use Kith.DataCase, async: true - use Oban.Testing, repo: Kith.Repo - - alias Kith.Workers.ImportSourceWorker - alias Kith.Imports - - import Kith.AccountsFixtures - import Kith.ImportsFixtures - - setup do - user = user_fixture() - %{user: user, account_id: user.account_id} - end - - describe "perform/1" do - test "processes a vcard import", %{account_id: account_id, user: user} do - # Store a VCF file - vcf_data = "BEGIN:VCARD\r\nVERSION:3.0\r\nN:Doe;Jane;;;\r\nFN:Jane Doe\r\nEND:VCARD\r\n" - storage_key = "imports/test/export.vcf" - {:ok, _} = Kith.Storage.upload_binary(vcf_data, storage_key) - - import_job = import_fixture(account_id, user.id, %{ - source: "vcard", - file_name: "export.vcf", - file_storage_key: storage_key - }) - - assert :ok = perform_job(ImportSourceWorker, %{import_id: import_job.id}) - - updated = Imports.get_import!(import_job.id) - assert updated.status == "completed" - assert updated.summary["contacts"] >= 1 - end - - test "marks import as failed on error", %{account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{ - source: "vcard", - file_name: "export.vcf", - file_storage_key: "nonexistent/path.vcf" - }) - - assert {:error, _} = perform_job(ImportSourceWorker, %{import_id: import_job.id}) - - updated = Imports.get_import!(import_job.id) - assert updated.status == "failed" - end - end -end -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/import_source_worker_test.exs -v` -Expected: FAIL — module not found. - -- [ ] **Step 3: Implement the worker** - -Create `lib/kith/workers/import_source_worker.ex`: - -```elixir -defmodule Kith.Workers.ImportSourceWorker do - @moduledoc """ - Generic Oban worker that orchestrates any import source. - - Loads the import job, resolves the source module, loads the file from - Storage, and delegates to `source.import/4`. Broadcasts progress via PubSub. - """ - - use Oban.Worker, queue: :imports, max_attempts: 3 - - require Logger - - alias Kith.Imports - - @impl Oban.Worker - def perform(%Oban.Job{args: %{"import_id" => import_id}}) do - import = Imports.get_import!(import_id) - - with {:ok, source_mod} <- Imports.resolve_source(import.source), - {:ok, _} <- Imports.update_import_status(import, "processing", %{started_at: DateTime.utc_now()}), - {:ok, data} <- load_file(import.file_storage_key), - {:ok, summary} <- source_mod.import(import.account_id, import.user_id, data, %{import: import}) do - now = DateTime.utc_now() |> DateTime.truncate(:second) - summary_map = ensure_map(summary) - - Imports.update_import_status(import, "completed", %{ - summary: summary_map, - completed_at: now - }) - - topic = "import:#{import.account_id}" - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) - - Logger.info("Import #{import_id} completed: #{inspect(summary_map)}") - :ok - else - {:error, reason} -> - Logger.error("Import #{import_id} failed: #{inspect(reason)}") - Imports.update_import_status(import, "failed", %{ - summary: %{error: inspect(reason)}, - completed_at: DateTime.utc_now() |> DateTime.truncate(:second) - }) - {:error, reason} - end - end - - defp load_file(nil), do: {:error, "No file storage key"} - defp load_file(key) do - case Kith.Storage.read(key) do - {:ok, data} -> {:ok, data} - {:error, reason} -> {:error, "Failed to load file: #{inspect(reason)}"} - end - end - - # Handle plain maps (already a map) vs structs - defp ensure_map(%{__struct__: _} = s), do: Map.from_struct(s) - defp ensure_map(m) when is_map(m), do: m -end -``` - -**Note:** Check if `Kith.Storage.read/1` exists. If not, you'll need to add it — look at the Storage module for the equivalent function that reads a file by key. It may be named `download/1` or `get/1`. Adapt the function name accordingly. - -- [ ] **Step 4: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/import_source_worker_test.exs -v` -Expected: All PASS (may need to adjust `Storage.read/1` to match actual API). - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/import_source_worker.ex test/kith/workers/import_source_worker_test.exs -git commit -m "feat: add generic ImportSourceWorker for Oban-based imports" -``` - ---- - -### Task 7: Oban config — Add new queues and cron jobs - -**Files:** -- Modify: `config/config.exs:34-53` - -- [ ] **Step 1: Add queues and cron entry** - -In `config/config.exs`, add to the `queues` list: - -```elixir - photo_sync: 5, - api_supplement: 3 -``` - -Add to the `crontab` list: - -```elixir - {"0 5 * * 0", Kith.Workers.ImportFileCleanupWorker} -``` - -- [ ] **Step 2: Register JSON MIME type for uploads** - -Add to the existing `config :mime` line or add new: - -```elixir -config :mime, :types, %{"text/vcard" => ["vcf"], "application/json" => ["json"]} -``` - -- [ ] **Step 3: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add config/config.exs -git commit -m "feat: add photo_sync, api_supplement Oban queues and cleanup cron" -``` - ---- - -### Task 8: Photo.pending_sync? helper - -**Files:** -- Modify: `lib/kith/contacts/photo.ex` - -- [ ] **Step 1: Add pending_sync? helper to Photo** - -In `lib/kith/contacts/photo.ex`, add after the `changeset/2` function: - -```elixir - @doc "Returns true if the photo is awaiting sync from an external source." - def pending_sync?(%__MODULE__{storage_key: "pending_sync:" <> _}), do: true - def pending_sync?(%__MODULE__{}), do: false -``` - -- [ ] **Step 2: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/contacts/photo.ex -git commit -m "feat: add Photo.pending_sync? helper for import photo placeholders" -``` - ---- - -### Task 9: Monica source — Skeleton + validate_file + parse_summary - -**Files:** -- Create: `lib/kith/imports/sources/monica.ex` -- Create: `test/kith/imports/sources/monica_test.exs` -- Create: `test/support/fixtures/monica_export.json` (minimal test fixture) - -This is the first of several tasks building out the Monica source. We start with the structural validation and summary parsing — the `import/4` callback is built incrementally in Tasks 10-13. - -- [ ] **Step 1: Create a minimal Monica JSON test fixture** - -Create `test/support/fixtures/monica_export.json` — a minimal but structurally complete Monica export: - -```json -{ - "version": "3.0.0", - "app_version": "4.1.2", - "account": { - "data": { - "uuid": "test-account-uuid" - } - }, - "contacts": { - "data": [ - { - "uuid": "contact-uuid-1", - "first_name": "Jane", - "last_name": "Doe", - "middle_name": "Marie", - "nickname": "JD", - "description": "A friend", - "company": "Acme", - "job": "Engineer", - "is_starred": true, - "is_active": true, - "is_dead": false, - "gender": {"data": {"uuid": "gender-uuid-1", "name": "Female"}}, - "birthdate": { - "data": { - "date": "1990-06-15", - "is_year_unknown": false, - "is_age_based": false - } - }, - "first_met_date": { - "data": { - "date": "2015-09-01", - "is_year_unknown": false, - "is_age_based": false - } - }, - "first_met_through": null, - "tags": {"data": [{"uuid": "tag-uuid-1", "name": "College"}]}, - "contact_fields": { - "data": [ - { - "uuid": "cf-uuid-1", - "value": "jane@example.com", - "contact_field_type": {"data": {"uuid": "cft-uuid-1", "name": "Email"}} - } - ] - }, - "addresses": { - "data": [ - { - "uuid": "addr-uuid-1", - "street": "123 Main St", - "city": "Springfield", - "province": "IL", - "postal_code": "62701", - "country": "US" - } - ] - }, - "notes": { - "data": [ - { - "uuid": "note-uuid-1", - "body": "Met at orientation", - "created_at": "2020-01-15T10:00:00Z" - } - ] - }, - "reminders": {"data": []}, - "pets": { - "data": [ - { - "uuid": "pet-uuid-1", - "name": "Buddy", - "pet_category": {"data": {"name": "Dog"}} - } - ] - }, - "photos": { - "data": [ - { - "uuid": "photo-uuid-1", - "file_name": "profile.jpg" - } - ] - }, - "activities": {"data": []} - }, - { - "uuid": "contact-uuid-2", - "first_name": "John", - "last_name": "Smith", - "middle_name": null, - "nickname": null, - "description": null, - "company": null, - "job": null, - "is_starred": false, - "is_active": true, - "is_dead": false, - "gender": null, - "birthdate": {"data": {"date": null, "is_year_unknown": false, "is_age_based": false}}, - "first_met_date": {"data": {"date": null, "is_year_unknown": false, "is_age_based": false}}, - "first_met_through": {"data": {"uuid": "contact-uuid-1"}}, - "tags": {"data": []}, - "contact_fields": {"data": []}, - "addresses": {"data": []}, - "notes": {"data": []}, - "reminders": {"data": []}, - "pets": {"data": []}, - "photos": {"data": []}, - "activities": {"data": []} - } - ] - }, - "relationships": { - "data": [ - { - "uuid": "rel-uuid-1", - "contact_is": {"data": {"uuid": "contact-uuid-1"}}, - "of_contact": {"data": {"uuid": "contact-uuid-2"}}, - "relationship_type": {"data": {"uuid": "rt-uuid-1", "name": "Friend", "reverse_name": "Friend"}} - } - ] - } -} -``` - -- [ ] **Step 2: Write failing tests** - -Create `test/kith/imports/sources/monica_test.exs`: - -```elixir -defmodule Kith.Imports.Sources.MonicaTest do - use Kith.DataCase, async: true - - alias Kith.Imports.Sources.Monica, as: MonicaSource - - @fixture_path "test/support/fixtures/monica_export.json" - - setup do - data = File.read!(@fixture_path) - %{data: data} - end - - describe "name/0" do - test "returns source name" do - assert MonicaSource.name() == "Monica CRM" - end - end - - describe "file_types/0" do - test "returns accepted file types" do - assert MonicaSource.file_types() == [".json"] - end - end - - describe "supports_api?/0" do - test "returns true" do - assert MonicaSource.supports_api?() - end - end - - describe "validate_file/1" do - test "validates a proper Monica export", %{data: data} do - assert {:ok, _} = MonicaSource.validate_file(data) - end - - test "rejects invalid JSON" do - assert {:error, _} = MonicaSource.validate_file("not json") - end - - test "rejects JSON missing required keys" do - assert {:error, _} = MonicaSource.validate_file(Jason.encode!(%{foo: "bar"})) - end - end - - describe "parse_summary/1" do - test "returns entity counts", %{data: data} do - assert {:ok, summary} = MonicaSource.parse_summary(data) - assert summary.contacts == 2 - assert summary.relationships == 1 - assert summary.photos == 1 - end - end -end -``` - -- [ ] **Step 3: Run tests to verify they fail** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/monica_test.exs -v` -Expected: FAIL — module not found. - -- [ ] **Step 4: Implement Monica source skeleton** - -Create `lib/kith/imports/sources/monica.ex`: - -```elixir -defmodule Kith.Imports.Sources.Monica do - @moduledoc """ - Monica CRM import source. Parses JSON export files and imports contacts - with all associated data. Supports optional API photo sync. - """ - - @behaviour Kith.Imports.Source - - require Logger - - alias Kith.Imports - - @pet_species_map %{ - "Dog" => "dog", "Cat" => "cat", "Bird" => "bird", "Fish" => "fish", - "Reptile" => "reptile", "Rabbit" => "rabbit", "Hamster" => "hamster" - } - - @impl true - def name, do: "Monica CRM" - - @impl true - def file_types, do: [".json"] - - @impl true - def supports_api?, do: true - - @impl true - def validate_file(data) do - with {:ok, parsed} <- Jason.decode(data), - true <- is_map(parsed), - true <- Map.has_key?(parsed, "contacts"), - true <- Map.has_key?(parsed, "account") do - {:ok, parsed} - else - _ -> {:error, "Invalid Monica CRM export file. Expected JSON with 'contacts' and 'account' keys."} - end - end - - @impl true - def parse_summary(data) do - with {:ok, parsed} <- Jason.decode(data) do - contacts = get_in(parsed, ["contacts", "data"]) || [] - relationships = get_in(parsed, ["relationships", "data"]) || [] - - photos = - contacts - |> Enum.flat_map(fn c -> get_in(c, ["photos", "data"]) || [] end) - |> length() - - notes = - contacts - |> Enum.flat_map(fn c -> get_in(c, ["notes", "data"]) || [] end) - |> length() - - {:ok, %{ - contacts: length(contacts), - relationships: length(relationships), - photos: photos, - notes: notes - }} - end - end - - @impl true - def import(account_id, user_id, data, opts) do - import_record = opts[:import] - - with {:ok, parsed} <- Jason.decode(data) do - contacts_data = get_in(parsed, ["contacts", "data"]) || [] - relationships_data = get_in(parsed, ["relationships", "data"]) || [] - total = length(contacts_data) - topic = "import:#{account_id}" - broadcast_interval = max(1, div(total, 50)) - - # Phase 1: Reference data - gender_map = import_reference_genders(account_id, contacts_data) - tag_map = import_reference_tags(account_id, contacts_data) - cft_map = import_reference_contact_field_types(account_id, contacts_data) - atc_map = import_reference_activity_type_categories(account_id, contacts_data) - - # Phase 2 & 3: Contacts + children (including activities with cross-contact dedup) - # processed_activities is a MapSet tracking activity UUIDs already created in this run - {contact_map, summary, _processed_activities} = - contacts_data - |> Enum.with_index(1) - |> Enum.reduce({%{}, init_summary(), MapSet.new()}, fn {contact_data, idx}, {cmap, acc, proc_acts} -> - # Check cancellation - if import_record && rem(idx, 10) == 0 do - refreshed = Imports.get_import!(import_record.id) - if refreshed.status == "cancelled", do: throw(:cancelled) - end - - case import_single_contact(account_id, user_id, contact_data, import_record, %{ - gender_map: gender_map, - tag_map: tag_map, - cft_map: cft_map, - atc_map: atc_map, - processed_activities: proc_acts - }) do - {:ok, contact, new_proc_acts} -> - new_cmap = Map.put(cmap, contact_data["uuid"], contact.id) - new_acc = %{acc | contacts: acc.contacts + 1} - - if rem(idx, broadcast_interval) == 0 || idx == total do - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_progress, %{current: idx, total: total}}) - end - - {new_cmap, new_acc, new_proc_acts} - - {:skip, reason} -> - Logger.info("Skipped contact #{contact_data["uuid"]}: #{reason}") - {cmap, %{acc | skipped: acc.skipped + 1}, proc_acts} - - {:error, reason} -> - Logger.warning("Failed to import contact #{contact_data["uuid"]}: #{inspect(reason)}") - {cmap, add_error(acc, "#{contact_data["first_name"]} #{contact_data["last_name"]}: #{inspect(reason)}"), proc_acts} - end - end) - - # Phase 4: Cross-contact references - import_relationships(account_id, relationships_data, contact_map, import_record) - import_first_met_through_links(account_id, contacts_data, contact_map) - - # Finalize summary — count notes from import_records (more accurate than in-loop counting) - notes_count = if import_record do - import_record.id - |> Imports.count_import_records_by_type("note") - else - 0 - end - - {:ok, %{summary | notes: notes_count}} - end - catch - :cancelled -> {:ok, init_summary()} - end - - # --- API callbacks --- - - @impl true - def test_connection(%{url: url, api_key: api_key}) do - case Req.get("#{url}/api/me", headers: [{"Authorization", "Bearer #{api_key}"}]) do - {:ok, %{status: 200}} -> :ok - {:ok, %{status: status}} -> {:error, "API returned status #{status}"} - {:error, reason} -> {:error, "Connection failed: #{inspect(reason)}"} - end - end - - @impl true - def fetch_photo(%{url: url, api_key: api_key}, photo_uuid) do - case Req.get("#{url}/api/photos/#{photo_uuid}", - headers: [{"Authorization", "Bearer #{api_key}"}]) do - {:ok, %{status: 200, body: body}} -> {:ok, body} - {:ok, %{status: 429}} -> {:error, :rate_limited} - {:ok, %{status: status}} -> {:error, "HTTP #{status}"} - {:error, reason} -> {:error, reason} - end - end - - @impl true - def api_supplement_options do - [ - %{key: :photos, label: "Sync photos", description: "Download contact photos via API"}, - %{key: :first_met_details, label: "Fetch \"How we met\" details", - description: "first_met_where and first_met_additional_info (not in JSON export)"} - ] - end - - @impl true - def fetch_supplement(%{url: url, api_key: api_key}, contact_source_id, :first_met_details) do - case Req.get("#{url}/api/contacts/#{contact_source_id}", - headers: [{"Authorization", "Bearer #{api_key}"}]) do - {:ok, %{status: 200, body: body}} -> - data = get_in(body, ["data"]) || body - {:ok, %{ - first_met_where: data["first_met_where"], - first_met_additional_info: data["first_met_additional_information"] - }} - {:ok, %{status: 429}} -> {:error, :rate_limited} - {:ok, %{status: status}} -> {:error, "HTTP #{status}"} - {:error, reason} -> {:error, reason} - end - end - - # --- Private: Phase 1 — Reference Data --- - - defp import_reference_genders(account_id, contacts_data) do - contacts_data - |> Enum.map(&get_in(&1, ["gender", "data"])) - |> Enum.reject(&is_nil/1) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn gender_data, acc -> - case find_or_create_gender(account_id, gender_data["name"]) do - {:ok, gender} -> Map.put(acc, gender_data["uuid"], gender.id) - _ -> acc - end - end) - end - - defp find_or_create_gender(account_id, name) do - alias Kith.Contacts.Gender - alias Kith.Repo - import Ecto.Query - - case Repo.one(from g in Gender, where: g.name == ^name and (is_nil(g.account_id) or g.account_id == ^account_id)) do - nil -> Kith.Contacts.create_gender(account_id, %{name: name}) - gender -> {:ok, gender} - end - end - - defp import_reference_tags(account_id, contacts_data) do - contacts_data - |> Enum.flat_map(fn c -> get_in(c, ["tags", "data"]) || [] end) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn tag_data, acc -> - case find_or_create_tag(account_id, tag_data["name"]) do - {:ok, tag} -> Map.put(acc, tag_data["uuid"], tag.id) - _ -> acc - end - end) - end - - defp find_or_create_tag(account_id, name) do - alias Kith.Contacts.Tag - alias Kith.Repo - import Ecto.Query - - case Repo.one(from t in Tag, where: t.account_id == ^account_id and t.name == ^name) do - nil -> Kith.Contacts.create_tag(account_id, %{name: name}) - tag -> {:ok, tag} - end - end - - defp import_reference_contact_field_types(account_id, contacts_data) do - contacts_data - |> Enum.flat_map(fn c -> get_in(c, ["contact_fields", "data"]) || [] end) - |> Enum.map(&get_in(&1, ["contact_field_type", "data"])) - |> Enum.reject(&is_nil/1) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn cft_data, acc -> - case find_or_create_contact_field_type(account_id, cft_data["name"]) do - {:ok, cft} -> Map.put(acc, cft_data["uuid"], cft.id) - _ -> acc - end - end) - end - - defp find_or_create_contact_field_type(account_id, name) do - alias Kith.Contacts.ContactFieldType - alias Kith.Repo - import Ecto.Query - - case Repo.one(from cft in ContactFieldType, where: cft.name == ^name and (is_nil(cft.account_id) or cft.account_id == ^account_id)) do - nil -> Kith.Contacts.create_contact_field_type(account_id, %{name: name}) - cft -> {:ok, cft} - end - end - - defp import_reference_activity_type_categories(account_id, contacts_data) do - contacts_data - |> Enum.flat_map(fn c -> get_in(c, ["activities", "data"]) || [] end) - |> Enum.map(&get_in(&1, ["activity_type_category", "data"])) - |> Enum.reject(&is_nil/1) - |> Enum.uniq_by(& &1["uuid"]) - |> Enum.reduce(%{}, fn atc_data, acc -> - case find_or_create_activity_type_category(account_id, atc_data["name"]) do - {:ok, atc} -> Map.put(acc, atc_data["uuid"], atc.id) - _ -> acc - end - end) - end - - defp find_or_create_activity_type_category(account_id, name) do - alias Kith.Contacts.ActivityTypeCategory - alias Kith.Repo - import Ecto.Query - - case Repo.one(from atc in ActivityTypeCategory, where: atc.name == ^name and (is_nil(atc.account_id) or atc.account_id == ^account_id)) do - nil -> Kith.Contacts.create_activity_type_category(account_id, %{name: name}) - atc -> {:ok, atc} - end - end - - # --- Private: Phase 2 — Single Contact Import --- - - # Returns {:ok, contact, updated_processed_activities} | {:skip, reason} | {:error, reason} - defp import_single_contact(account_id, user_id, contact_data, import_record, ref_maps) do - uuid = contact_data["uuid"] - proc_acts = ref_maps.processed_activities - - # Check for existing import record - existing = if import_record, do: Imports.find_import_record(account_id, "monica", "contact", uuid) - - case existing do - %{local_entity_id: local_id} -> - # Re-import: check if soft-deleted - case Kith.Repo.get(Kith.Contacts.Contact, local_id) do - %{deleted_at: deleted_at} when not is_nil(deleted_at) -> - {:skip, "previously deleted in Kith, not restoring"} - nil -> - do_import_contact(account_id, user_id, contact_data, import_record, ref_maps) - _contact -> - do_upsert_contact(account_id, user_id, local_id, contact_data, import_record, ref_maps) - end - nil -> - do_import_contact(account_id, user_id, contact_data, import_record, ref_maps) - end - end - - defp do_import_contact(account_id, user_id, contact_data, import_record, ref_maps) do - attrs = map_contact_attrs(contact_data, ref_maps) - - case Kith.Contacts.create_contact(account_id, attrs) do - {:ok, contact} -> - new_proc_acts = import_contact_children(contact, user_id, contact_data, import_record, ref_maps) - import_contact_tags(contact, contact_data, ref_maps.tag_map) - - if import_record do - Imports.record_imported_entity(import_record, "contact", contact_data["uuid"], "contact", contact.id) - end - - {:ok, contact, new_proc_acts} - - {:error, changeset} -> - {:error, changeset} - end - end - - defp do_upsert_contact(account_id, user_id, local_id, contact_data, import_record, ref_maps) do - contact = Kith.Repo.get!(Kith.Contacts.Contact, local_id) - attrs = map_contact_attrs(contact_data, ref_maps) - - case Kith.Contacts.update_contact(contact, attrs) do - {:ok, contact} -> - new_proc_acts = import_contact_children(contact, user_id, contact_data, import_record, ref_maps) - import_contact_tags(contact, contact_data, ref_maps.tag_map) - - if import_record do - Imports.record_imported_entity(import_record, "contact", contact_data["uuid"], "contact", contact.id) - end - - {:ok, contact, new_proc_acts} - - {:error, changeset} -> - {:error, changeset} - end - end - - defp map_contact_attrs(contact_data, ref_maps) do - gender_id = if gender = get_in(contact_data, ["gender", "data"]) do - Map.get(ref_maps.gender_map, gender["uuid"]) - end - - birthdate_info = parse_special_date(get_in(contact_data, ["birthdate", "data"])) - first_met_info = parse_special_date(get_in(contact_data, ["first_met_date", "data"])) - - %{ - first_name: contact_data["first_name"], - last_name: contact_data["last_name"], - middle_name: contact_data["middle_name"], - nickname: contact_data["nickname"], - description: contact_data["description"], - company: contact_data["company"], - occupation: contact_data["job"], - favorite: contact_data["is_starred"] || false, - is_archived: contact_data["is_active"] == false, - deceased: contact_data["is_dead"] || false, - gender_id: gender_id, - birthdate: birthdate_info.date, - birthdate_year_unknown: birthdate_info.year_unknown, - first_met_at: first_met_info.date, - first_met_year_unknown: first_met_info.year_unknown - } - |> Enum.reject(fn {_k, v} -> is_nil(v) end) - |> Map.new() - end - - defp parse_special_date(nil), do: %{date: nil, year_unknown: false} - defp parse_special_date(%{"date" => nil}), do: %{date: nil, year_unknown: false} - defp parse_special_date(%{"date" => date_str, "is_year_unknown" => year_unknown} = data) do - case Date.from_iso8601(date_str) do - {:ok, date} -> - if year_unknown && !data["is_age_based"] do - # Store with sentinel year 1, flag as unknown - %{date: %{date | year: 1}, year_unknown: true} - else - %{date: date, year_unknown: false} - end - _ -> - %{date: nil, year_unknown: false} - end - end - defp parse_special_date(_), do: %{date: nil, year_unknown: false} - - # --- Private: Phase 3 — Contact Children --- - - # Returns updated processed_activities MapSet - defp import_contact_children(contact, user_id, contact_data, import_record, ref_maps) do - import_contact_fields(contact, contact_data, ref_maps.cft_map, import_record) - import_addresses(contact, contact_data, import_record) - import_notes(contact, user_id, contact_data, import_record) - import_reminders(contact, user_id, contact_data, import_record) - import_pets(contact, contact_data, import_record) - import_photos(contact, contact_data, import_record) - import_activities(contact, user_id, contact_data, import_record, ref_maps.processed_activities, ref_maps.atc_map) - end - - defp import_contact_fields(contact, contact_data, cft_map, import_record) do - for cf <- get_in(contact_data, ["contact_fields", "data"]) || [] do - cft_uuid = get_in(cf, ["contact_field_type", "data", "uuid"]) - cft_id = Map.get(cft_map, cft_uuid) - - if cft_id do - case Kith.Contacts.create_contact_field(contact, %{ - "value" => cf["value"], - "contact_field_type_id" => cft_id - }) do - {:ok, field} -> - if import_record, do: Imports.record_imported_entity(import_record, "contact_field", cf["uuid"], "contact_field", field.id) - {:error, reason} -> - Logger.warning("Failed to import contact field #{cf["uuid"]}: #{inspect(reason)}") - end - end - end - end - - defp import_addresses(contact, contact_data, import_record) do - for addr <- get_in(contact_data, ["addresses", "data"]) || [] do - case Kith.Contacts.create_address(contact, %{ - "line1" => addr["street"], - "city" => addr["city"], - "province" => addr["province"], - "postal_code" => addr["postal_code"], - "country" => addr["country"] - }) do - {:ok, address} -> - if import_record, do: Imports.record_imported_entity(import_record, "address", addr["uuid"], "address", address.id) - {:error, reason} -> - Logger.warning("Failed to import address #{addr["uuid"]}: #{inspect(reason)}") - end - end - end - - defp import_notes(contact, user_id, contact_data, import_record) do - for note <- get_in(contact_data, ["notes", "data"]) || [] do - case Kith.Contacts.create_note(contact, user_id, %{"body" => note["body"]}) do - {:ok, created_note} -> - if import_record, do: Imports.record_imported_entity(import_record, "note", note["uuid"], "note", created_note.id) - {:error, reason} -> - Logger.warning("Failed to import note #{note["uuid"]}: #{inspect(reason)}") - end - end - end - - defp import_reminders(contact, user_id, contact_data, import_record) do - for reminder <- get_in(contact_data, ["reminders", "data"]) || [] do - attrs = %{ - type: "one_time", - title: reminder["title"] || "Imported reminder", - next_reminder_date: parse_date_string(reminder["next_expected_date"]), - contact_id: contact.id - } - - if attrs.next_reminder_date do - case Kith.Reminders.create_reminder(contact.account_id, user_id, attrs) do - {:ok, created} -> - if import_record, do: Imports.record_imported_entity(import_record, "reminder", reminder["uuid"], "reminder", created.id) - {:error, reason} -> - Logger.warning("Failed to import reminder #{reminder["uuid"]}: #{inspect(reason)}") - end - end - end - end - - defp import_pets(contact, contact_data, import_record) do - for pet <- get_in(contact_data, ["pets", "data"]) || [] do - category_name = get_in(pet, ["pet_category", "data", "name"]) || "other" - species = Map.get(@pet_species_map, category_name, "other") - - case Kith.Pets.create_pet(contact.account_id, %{ - name: pet["name"] || "Unnamed", - species: species, - contact_id: contact.id - }) do - {:ok, created_pet} -> - if import_record, do: Imports.record_imported_entity(import_record, "pet", pet["uuid"], "pet", created_pet.id) - {:error, reason} -> - Logger.warning("Failed to import pet #{pet["uuid"]}: #{inspect(reason)}") - end - end - end - - defp import_photos(contact, contact_data, import_record) do - for photo <- get_in(contact_data, ["photos", "data"]) || [] do - case Kith.Contacts.create_photo(contact, %{ - "file_name" => photo["file_name"] || "photo.jpg", - "storage_key" => "pending_sync:#{photo["uuid"]}", - "file_size" => 0, - "content_type" => "image/jpeg" - }) do - {:ok, created_photo} -> - if import_record, do: Imports.record_imported_entity(import_record, "photo", photo["uuid"], "photo", created_photo.id) - {:error, reason} -> - Logger.warning("Failed to import photo #{photo["uuid"]}: #{inspect(reason)}") - end - end - end - - # Returns updated processed_activities MapSet. - # Activities can be shared across contacts — deduplicate by UUID. - # On first encounter: create the activity + join table entry. - # On subsequent contacts referencing the same UUID: add only the join table entry. - # On resume after cancellation: check import_records first (MapSet starts empty). - defp import_activities(contact, user_id, contact_data, import_record, processed_activities, atc_map) do - activities = get_in(contact_data, ["activities", "data"]) || [] - - Enum.reduce(activities, processed_activities, fn activity_data, proc_acts -> - uuid = activity_data["uuid"] - already_in_run = MapSet.member?(proc_acts, uuid) - - # On resume: check import_records if not in this run's MapSet - already_in_db = if !already_in_run && import_record do - Imports.find_import_record(contact.account_id, "monica", "activity", uuid) != nil - else - false - end - - cond do - already_in_run || already_in_db -> - # Activity already created — just add the join table entry - existing_rec = Imports.find_import_record(contact.account_id, "monica", "activity", uuid) - if existing_rec do - Kith.Repo.insert_all("activity_contacts", - [%{activity_id: existing_rec.local_entity_id, contact_id: contact.id}], - on_conflict: :nothing - ) - end - proc_acts - - true -> - # First encounter — create the activity with type category lookup - atc_uuid = get_in(activity_data, ["activity_type_category", "data", "uuid"]) - atc_id = if atc_uuid, do: Map.get(atc_map, atc_uuid) - - attrs = %{ - "title" => activity_data["title"] || "Imported activity", - "description" => activity_data["description"], - "occurred_at" => parse_datetime(activity_data["occurred_at"]) || DateTime.utc_now(), - "activity_type_category_id" => atc_id - } - - case Kith.Activities.create_activity(contact.account_id, attrs, [contact.id]) do - {:ok, %{activity: activity}} -> - if import_record do - Imports.record_imported_entity(import_record, "activity", uuid, "activity", activity.id) - end - MapSet.put(proc_acts, uuid) - - {:error, _reason} -> - Logger.warning("Failed to import activity #{uuid}") - proc_acts - end - end - end) - end - - defp parse_datetime(nil), do: nil - defp parse_datetime(str) do - case DateTime.from_iso8601(str) do - {:ok, dt, _offset} -> dt - _ -> nil - end - end - - defp import_contact_tags(contact, contact_data, tag_map) do - for tag_data <- get_in(contact_data, ["tags", "data"]) || [] do - tag_id = Map.get(tag_map, tag_data["uuid"]) - if tag_id do - Kith.Repo.insert_all("contact_tags", - [%{contact_id: contact.id, tag_id: tag_id}], - on_conflict: :nothing - ) - end - end - end - - # --- Private: Phase 4 — Cross-Contact References --- - - defp import_relationships(account_id, relationships_data, contact_map, import_record) do - for rel <- relationships_data do - contact_uuid = get_in(rel, ["contact_is", "data", "uuid"]) - related_uuid = get_in(rel, ["of_contact", "data", "uuid"]) - contact_id = Map.get(contact_map, contact_uuid) - related_id = Map.get(contact_map, related_uuid) - - if contact_id && related_id do - rt_name = get_in(rel, ["relationship_type", "data", "name"]) || "Friend" - case find_or_create_relationship_type(account_id, rt_name, get_in(rel, ["relationship_type", "data"])) do - {:ok, rt} -> - contact = %Kith.Contacts.Contact{id: contact_id, account_id: account_id} - case Kith.Contacts.create_relationship(contact, %{ - "related_contact_id" => related_id, - "relationship_type_id" => rt.id - }) do - {:ok, relationship} -> - if import_record, do: Imports.record_imported_entity(import_record, "relationship", rel["uuid"], "relationship", relationship.id) - {:error, reason} -> - Logger.warning("Failed to import relationship #{rel["uuid"]}: #{inspect(reason)}") - end - _ -> :ok - end - else - failed = if is_nil(contact_id), do: contact_uuid, else: related_uuid - Logger.warning("Skipping relationship #{rel["uuid"]}: contact #{failed} was not imported") - end - end - end - - defp find_or_create_relationship_type(account_id, name, data) do - alias Kith.Contacts.RelationshipType - alias Kith.Repo - import Ecto.Query - - reverse_name = (data && data["reverse_name"]) || name - - case Repo.one(from rt in RelationshipType, where: rt.name == ^name and (is_nil(rt.account_id) or rt.account_id == ^account_id)) do - nil -> Kith.Contacts.create_relationship_type(account_id, %{name: name, reverse_name: reverse_name}) - rt -> {:ok, rt} - end - end - - defp import_first_met_through_links(account_id, contacts_data, contact_map) do - for contact_data <- contacts_data do - through_uuid = get_in(contact_data, ["first_met_through", "data", "uuid"]) - contact_id = Map.get(contact_map, contact_data["uuid"]) - - if through_uuid && contact_id do - through_id = Map.get(contact_map, through_uuid) - if through_id do - contact = Kith.Repo.get!(Kith.Contacts.Contact, contact_id) - Kith.Contacts.update_contact(contact, %{first_met_through_id: through_id}) - else - Logger.warning("first_met_through #{through_uuid} not found for contact #{contact_data["uuid"]}") - end - end - end - end - - # --- Helpers --- - - defp init_summary do - %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []} - end - - defp add_error(acc, msg) do - errors = if length(acc.errors) < 50, do: acc.errors ++ [msg], else: acc.errors - %{acc | skipped: acc.skipped + 1, error_count: acc.error_count + 1, errors: errors} - end - - defp parse_date_string(nil), do: nil - defp parse_date_string(str) do - case Date.from_iso8601(str) do - {:ok, date} -> date - _ -> nil - end - end -end -``` - -- [ ] **Step 5: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/monica_test.exs -v` -Expected: All PASS - -- [ ] **Step 6: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/imports/sources/monica.ex test/kith/imports/sources/monica_test.exs test/support/fixtures/monica_export.json -git commit -m "feat: add Monica CRM import source with full data mapping" -``` - ---- - -### Task 10: Monica source — Integration test (full import) - -**Files:** -- Modify: `test/kith/imports/sources/monica_test.exs` - -- [ ] **Step 1: Write integration test for full import** - -Add to `test/kith/imports/sources/monica_test.exs`: - -```elixir - describe "import/4" do - setup do - seed_reference_data!() - user = user_fixture() - %{user: user, account_id: user.account_id} - end - - test "imports contacts with all children", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - - assert {:ok, summary} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - assert summary.contacts == 2 - - # Verify contacts exist - contacts = Kith.Contacts.list_contacts(account_id) - assert length(contacts) == 2 - - jane = Enum.find(contacts, &(&1.first_name == "Jane")) - assert jane.last_name == "Doe" - assert jane.middle_name == "Marie" - assert jane.occupation == "Engineer" - assert jane.favorite == true - end - - test "imports contact children (notes, addresses, pets)", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - - contacts = Kith.Contacts.list_contacts(account_id) - jane = Enum.find(contacts, &(&1.first_name == "Jane")) - - notes = Kith.Contacts.list_notes(jane.id, user.id) - assert length(notes) == 1 - - pets = Kith.Pets.list_pets(account_id, jane.id) - assert length(pets) == 1 - assert hd(pets).species == "dog" - end - - test "creates import_records for deduplication", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - - rec = Kith.Imports.find_import_record(account_id, "monica", "contact", "contact-uuid-1") - assert rec != nil - end - - test "handles re-import (upsert)", %{data: data, account_id: account_id, user: user} do - import_job1 = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job1}) - - # Complete first import so we can create a second - Kith.Imports.update_import_status(import_job1, "completed") - - import_job2 = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, summary} = MonicaSource.import(account_id, user.id, data, %{import: import_job2}) - - # Should still have 2 contacts (upserted, not duplicated) - contacts = Kith.Contacts.list_contacts(account_id) - assert length(contacts) == 2 - assert summary.contacts == 2 - end - - test "resolves first_met_through cross-references", %{data: data, account_id: account_id, user: user} do - import_job = import_fixture(account_id, user.id, %{source: "monica"}) - {:ok, _} = MonicaSource.import(account_id, user.id, data, %{import: import_job}) - - contacts = Kith.Contacts.list_contacts(account_id) - john = Enum.find(contacts, &(&1.first_name == "John")) - jane = Enum.find(contacts, &(&1.first_name == "Jane")) - - reloaded = Kith.Repo.get!(Kith.Contacts.Contact, john.id) - assert reloaded.first_met_through_id == jane.id - end - end -``` - -Add required imports at the top: - -```elixir - import Kith.AccountsFixtures - import Kith.ContactsFixtures - import Kith.ImportsFixtures -``` - -- [ ] **Step 2: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/imports/sources/monica_test.exs -v` -Expected: All PASS. Debug any failures — these exercise the full import pipeline. - -- [ ] **Step 3: Run full test suite** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All pass. - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add test/kith/imports/sources/monica_test.exs -git commit -m "test: add Monica source integration tests for full import pipeline" -``` - ---- - -### Task 11: PhotoSyncWorker - -**Files:** -- Create: `lib/kith/workers/photo_sync_worker.ex` -- Create: `test/kith/workers/photo_sync_worker_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/workers/photo_sync_worker_test.exs`: - -```elixir -defmodule Kith.Workers.PhotoSyncWorkerTest do - use Kith.DataCase, async: true - use Oban.Testing, repo: Kith.Repo - - alias Kith.Workers.PhotoSyncWorker - - import Kith.AccountsFixtures - import Kith.ContactsFixtures - import Kith.ImportsFixtures - - describe "perform/1" do - test "discards when import not found" do - assert {:discard, _} = perform_job(PhotoSyncWorker, %{ - import_id: 999_999, - photo_id: 1, - source_photo_id: "uuid" - }) - end - end -end -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/photo_sync_worker_test.exs -v` - -- [ ] **Step 3: Implement PhotoSyncWorker** - -Create `lib/kith/workers/photo_sync_worker.ex`: - -```elixir -defmodule Kith.Workers.PhotoSyncWorker do - @moduledoc """ - Oban worker that downloads a single photo from an external source API - and stores it in Kith.Storage. Independent per-photo jobs with staggered scheduling. - """ - - use Oban.Worker, queue: :photo_sync, max_attempts: 3 - - require Logger - - alias Kith.Imports - alias Kith.Contacts.Photo - alias Kith.Repo - - @impl Oban.Worker - def perform(%Oban.Job{ - args: %{"import_id" => import_id, "photo_id" => photo_id, "source_photo_id" => source_photo_id}, - attempt: attempt, - max_attempts: max_attempts - }) do - with {:import, %{} = import} <- {:import, Imports.get_import(import_id)}, - {:photo, %Photo{} = photo} <- {:photo, Repo.get(Photo, photo_id)}, - {:source, {:ok, source_mod}} <- {:source, Imports.resolve_source(import.source)} do - - # Check if import was cancelled - if import.status == "cancelled", do: throw(:cancelled) - - # Check storage limit - case Kith.Storage.check_storage_limit(import.account_id, 0) do - :ok -> :ok - {:error, _} -> - Logger.warning("Storage limit reached for account #{import.account_id}, discarding photo #{photo_id}") - Repo.delete(photo) - throw(:discard) - end - - credential = %{url: import.api_url, api_key: import.api_key_encrypted} - - case source_mod.fetch_photo(credential, source_photo_id) do - {:ok, binary} -> - storage_key = Kith.Storage.generate_key(import.account_id, "photos", photo.file_name) - {:ok, _} = Kith.Storage.upload_binary(binary, storage_key) - - photo - |> Ecto.Changeset.change(%{ - storage_key: storage_key, - file_size: byte_size(binary) - }) - |> Repo.update!() - - maybe_cleanup_api_key(import) - :ok - - {:error, :rate_limited} -> - {:snooze, 60} - - {:error, reason} -> - Logger.warning("Photo sync failed for #{source_photo_id}: #{inspect(reason)}") - - # On final attempt: delete the Photo record so the contact doesn't have - # a permanently broken pending_sync: reference - if attempt >= max_attempts do - Repo.delete(photo) - Logger.warning("Deleted photo #{photo_id} after #{max_attempts} failed attempts") - end - - {:error, reason} - end - else - {:import, nil} -> {:discard, "Import not found"} - {:photo, nil} -> {:discard, "Photo not found"} - {:source, {:error, _}} -> {:discard, "Unknown source"} - end - catch - :cancelled -> {:discard, "Import cancelled"} - :discard -> {:discard, "Storage limit reached"} - end - - @impl Oban.Worker - def timeout(_job), do: :timer.minutes(5) - - defp maybe_cleanup_api_key(import) do - if Imports.pending_async_jobs_count(import.id) <= 1 do - Imports.wipe_api_key(import) - end - end -end -``` - -- [ ] **Step 4: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/photo_sync_worker_test.exs -v` -Expected: PASS - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/photo_sync_worker.ex test/kith/workers/photo_sync_worker_test.exs -git commit -m "feat: add PhotoSyncWorker for async photo downloads" -``` - ---- - -### Task 12: ApiSupplementWorker - -**Files:** -- Create: `lib/kith/workers/api_supplement_worker.ex` -- Create: `test/kith/workers/api_supplement_worker_test.exs` - -- [ ] **Step 1: Write failing test** - -Create `test/kith/workers/api_supplement_worker_test.exs`: - -```elixir -defmodule Kith.Workers.ApiSupplementWorkerTest do - use Kith.DataCase, async: true - use Oban.Testing, repo: Kith.Repo - - alias Kith.Workers.ApiSupplementWorker - - describe "perform/1" do - test "discards when import not found" do - assert {:discard, _} = perform_job(ApiSupplementWorker, %{ - import_id: 999_999, - contact_id: 1, - source_contact_id: "uuid", - key: "first_met_details" - }) - end - end -end -``` - -- [ ] **Step 2: Implement ApiSupplementWorker** - -Create `lib/kith/workers/api_supplement_worker.ex`: - -```elixir -defmodule Kith.Workers.ApiSupplementWorker do - @moduledoc """ - Oban worker that fetches supplementary data from a source API. - Currently handles first_met_details (first_met_where, first_met_additional_info). - """ - - use Oban.Worker, queue: :api_supplement, max_attempts: 3 - - require Logger - - alias Kith.Imports - alias Kith.Contacts.Contact - alias Kith.Repo - - @impl Oban.Worker - def perform(%Oban.Job{args: %{ - "import_id" => import_id, - "contact_id" => contact_id, - "source_contact_id" => source_contact_id, - "key" => key - }}) do - key_atom = String.to_existing_atom(key) - - with {:import, %{} = import} <- {:import, Imports.get_import(import_id)}, - {:contact, %Contact{} = contact} <- {:contact, Repo.get(Contact, contact_id)}, - {:source, {:ok, source_mod}} <- {:source, Imports.resolve_source(import.source)} do - - if import.status == "cancelled", do: throw(:cancelled) - - credential = %{url: import.api_url, api_key: import.api_key_encrypted} - - case source_mod.fetch_supplement(credential, source_contact_id, key_atom) do - {:ok, data} -> - attrs = Map.take(data, [:first_met_where, :first_met_additional_info]) - Kith.Contacts.update_contact(contact, attrs) - maybe_cleanup_api_key(import) - :ok - - {:error, :rate_limited} -> - {:snooze, 60} - - {:error, reason} -> - Logger.warning("API supplement failed for contact #{source_contact_id}: #{inspect(reason)}") - {:error, reason} - end - else - {:import, nil} -> {:discard, "Import not found"} - {:contact, nil} -> {:discard, "Contact not found"} - {:source, {:error, _}} -> {:discard, "Unknown source"} - end - catch - :cancelled -> {:discard, "Import cancelled"} - end - - defp maybe_cleanup_api_key(import) do - if Imports.pending_async_jobs_count(import.id) <= 1 do - Imports.wipe_api_key(import) - end - end -end -``` - -- [ ] **Step 3: Run tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test test/kith/workers/api_supplement_worker_test.exs -v` -Expected: PASS - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/api_supplement_worker.ex test/kith/workers/api_supplement_worker_test.exs -git commit -m "feat: add ApiSupplementWorker for fetching first-met details" -``` - ---- - -### Task 13: ImportFileCleanupWorker - -**Files:** -- Create: `lib/kith/workers/import_file_cleanup_worker.ex` - -- [ ] **Step 1: Implement the cleanup worker** - -Create `lib/kith/workers/import_file_cleanup_worker.ex`: - -```elixir -defmodule Kith.Workers.ImportFileCleanupWorker do - @moduledoc """ - Periodic Oban cron job that deletes import files older than 30 days. - Runs weekly (Sunday 5 AM). - """ - - use Oban.Worker, queue: :default, max_attempts: 1 - - require Logger - - import Ecto.Query - alias Kith.Repo - alias Kith.Imports.Import - - @retention_days 30 - - @impl Oban.Worker - def perform(_job) do - cutoff = DateTime.utc_now() |> DateTime.add(-@retention_days * 86_400, :second) - - imports = - Import - |> where([i], i.status in ["completed", "failed", "cancelled"]) - |> where([i], not is_nil(i.file_storage_key)) - |> where([i], i.completed_at < ^cutoff or (is_nil(i.completed_at) and i.updated_at < ^cutoff)) - |> Repo.all() - - Enum.each(imports, fn import -> - case Kith.Storage.delete(import.file_storage_key) do - :ok -> - import - |> Ecto.Changeset.change(file_storage_key: nil) - |> Repo.update!() - Logger.info("Cleaned up import file for import #{import.id}") - - {:error, reason} -> - Logger.warning("Failed to delete import file #{import.file_storage_key}: #{inspect(reason)}") - end - end) - - :ok - end -end -``` - -- [ ] **Step 2: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/import_file_cleanup_worker.ex -git commit -m "feat: add ImportFileCleanupWorker for 30-day file retention" -``` - ---- - -### Task 14: ImportWizardLive — LiveView with source selection - -**Files:** -- Create: `lib/kith_web/live/import_wizard_live.ex` -- Modify: `lib/kith_web/router.ex` (update route to point to new LiveView) - -- [ ] **Step 1: Create the ImportWizardLive** - -Create `lib/kith_web/live/import_wizard_live.ex`: - -```elixir -defmodule KithWeb.ImportWizardLive do - use KithWeb, :live_view - - alias Kith.Policy - alias Kith.Imports - - import KithWeb.SettingsLive.SettingsLayout - - @max_file_size 50 * 1024 * 1024 - - @impl true - def mount(_params, _session, socket) do - {:ok, - socket - |> assign(:page_title, "Import Contacts") - |> assign(:step, :source_selection) - |> assign(:source, nil) - |> assign(:importing, false) - |> assign(:progress, nil) - |> assign(:results, nil) - |> assign(:summary, nil) - |> assign(:import_job, nil) - |> assign(:api_connected, false) - |> assign(:api_options, %{}) - |> allow_upload(:import_file, - accept: ~w(.vcf .json), - max_file_size: @max_file_size, - max_entries: 1 - )} - end - - @impl true - def handle_params(_params, _uri, socket) do - scope = socket.assigns.current_scope - - unless Policy.can?(scope.user, :create, :import) do - {:noreply, - socket - |> put_flash(:error, "You do not have permission to import contacts.") - |> push_navigate(to: ~p"/")} - else - if connected?(socket) do - Phoenix.PubSub.subscribe(Kith.PubSub, "import:#{scope.account.id}") - end - - # Check for active import - case Imports.get_active_import(scope.account.id) do - %{} = import_job -> - {:noreply, socket |> assign(:step, :progress) |> assign(:import_job, import_job) |> assign(:importing, true)} - nil -> - {:noreply, socket} - end - end - end - - @impl true - def handle_event("select_source", %{"source" => source}, socket) do - {:noreply, assign(socket, :source, source)} - end - - def handle_event("validate", _params, socket) do - {:noreply, socket} - end - - def handle_event("upload_and_validate", _params, socket) do - scope = socket.assigns.current_scope - source = socket.assigns.source - - results = - consume_uploaded_entries(socket, :import_file, fn %{path: path}, entry -> - data = File.read!(path) - - with {:ok, source_mod} <- Imports.resolve_source(source), - {:ok, _} <- source_mod.validate_file(data), - {:ok, summary} <- source_mod.parse_summary(data) do - # Store file - storage_key = "imports/pending/#{entry.client_name}" - {:ok, _} = Kith.Storage.upload_binary(data, storage_key) - {:ok, {summary, storage_key, entry.client_name, byte_size(data)}} - else - {:error, reason} -> {:ok, {:error, reason}} - end - end) - - case List.first(results) do - {summary, storage_key, file_name, file_size} -> - {:noreply, - socket - |> assign(:step, :confirmation) - |> assign(:summary, summary) - |> assign(:file_storage_key, storage_key) - |> assign(:file_name, file_name) - |> assign(:file_size, file_size)} - - {:error, reason} -> - {:noreply, put_flash(socket, :error, reason)} - - nil -> - {:noreply, put_flash(socket, :error, "No file uploaded.")} - end - end - - def handle_event("start_import", _params, socket) do - scope = socket.assigns.current_scope - - attrs = %{ - source: socket.assigns.source, - file_name: socket.assigns.file_name, - file_size: socket.assigns.file_size, - file_storage_key: socket.assigns.file_storage_key, - api_url: socket.assigns[:api_url], - api_key_encrypted: socket.assigns[:api_key], - api_options: socket.assigns.api_options - } - - case Imports.create_import(scope.account.id, scope.user.id, attrs) do - {:ok, import_job} -> - %{import_id: import_job.id} - |> Kith.Workers.ImportSourceWorker.new() - |> Oban.insert() - - {:noreply, - socket - |> assign(:step, :progress) - |> assign(:import_job, import_job) - |> assign(:importing, true)} - - {:error, :import_in_progress} -> - {:noreply, put_flash(socket, :error, "An import is already in progress.")} - - {:error, changeset} -> - {:noreply, put_flash(socket, :error, "Failed to start import: #{inspect(changeset.errors)}")} - end - end - - def handle_event("cancel_import", _params, socket) do - if socket.assigns.import_job do - Imports.cancel_import(socket.assigns.import_job) - end - {:noreply, socket} - end - - def handle_event("test_api_connection", %{"url" => url, "api_key" => api_key}, socket) do - with {:ok, source_mod} <- Imports.resolve_source(socket.assigns.source), - :ok <- source_mod.test_connection(%{url: url, api_key: api_key}) do - options = if function_exported?(source_mod, :api_supplement_options, 0) do - source_mod.api_supplement_options() - else - [] - end - - {:noreply, - socket - |> assign(:api_connected, true) - |> assign(:api_url, url) - |> assign(:api_key, api_key) - |> assign(:supplement_options, options)} - else - {:error, reason} -> - {:noreply, - socket - |> assign(:api_connected, false) - |> put_flash(:error, "Connection failed: #{reason}")} - end - end - - def handle_event("toggle_api_option", %{"key" => key}, socket) do - opts = socket.assigns.api_options - key_atom = String.to_existing_atom(key) - new_opts = Map.update(opts, key_atom, true, &(!&1)) - {:noreply, assign(socket, :api_options, new_opts)} - end - - def handle_event("reset", _params, socket) do - {:noreply, - socket - |> assign(:step, :source_selection) - |> assign(:source, nil) - |> assign(:results, nil) - |> assign(:summary, nil) - |> assign(:importing, false) - |> assign(:progress, nil)} - end - - @impl true - def handle_info({:import_progress, progress}, socket) do - {:noreply, assign(socket, :progress, progress)} - end - - def handle_info({:import_complete, results}, socket) do - {:noreply, - socket - |> assign(:importing, false) - |> assign(:step, :complete) - |> assign(:results, results)} - end - - @impl true - def render(assigns) do - ~H""" - - <.settings_shell current_path={@current_path} current_scope={@current_scope}> - - Import Contacts - <:subtitle>Import contacts from vCard or Monica CRM - - - <%!-- Step 1: Source Selection --%> -
-
- - - -
- -
-
-
- <.live_file_input upload={@uploads.import_file} class="hidden" /> -

- Drag and drop a {if @source == "vcard", do: ".vcf", else: ".json"} file here, or - -

-
- -
- {entry.client_name} - {Float.round(entry.client_size / 1024, 1)} KB -
- -

- {upload_error_message(err)} -

- -
- - Validate & Continue - -
-
-
-
- - <%!-- Step 2: Confirmation --%> -
-
-

Import Summary

-
-
-
Contacts
-
{@summary.contacts}
-
-
-
Notes
-
{@summary.notes}
-
-
-
Relationships
-
{@summary.relationships}
-
-
-
Photos
-
{@summary.photos}
-
-
-
- - <%!-- Monica API section --%> -
-
- Connect to Monica API (optional) -
-
-
- - -
-
- - -
- Test Connection -
- -
-

Connected successfully

-
- - -
-
-
-
-
- -
- Start Import - Back -
-
- - <%!-- Step 3: Progress --%> -
-
-
-

- Processing contact {@progress.current} / {@progress.total}... -

-
-
0, do: round(@progress.current / @progress.total * 100), else: 0}%"} - /> -
-
-

Starting import...

- -
- Cancel Import -
-
-
- - <%!-- Step 4: Complete --%> -
-
-

Import Complete

-
-

{@results["contacts"] || @results[:contacts] || 0} contacts imported

-

0} class="text-[var(--color-warning)]"> - {@results["skipped"] || @results[:skipped]} skipped -

-

0} class="text-[var(--color-error)]"> - {@results["error_count"] || @results[:error_count]} errors -

-
-
- <.link navigate={~p"/contacts"} class="text-[var(--color-accent)] hover:underline text-sm">View contacts - -
-
-
- - - """ - end - - defp upload_error_message(:too_large), do: "File is too large (max 50 MB)" - defp upload_error_message(:not_accepted), do: "Only .vcf and .json files are accepted" - defp upload_error_message(:too_many_files), do: "Only one file at a time" - defp upload_error_message(other), do: "Upload error: #{inspect(other)}" -end -``` - -- [ ] **Step 2: Update the router** - -In `lib/kith_web/router.ex`, find the existing import route (likely `live "/settings/import", SettingsLive.Import`) and replace with: - -```elixir -live "/settings/import", ImportWizardLive -``` - -- [ ] **Step 3: Verify it compiles** - -Run: `cd /Users/basharqassis/projects/kith && mix compile --warnings-as-errors` -Expected: Compiles without errors. - -- [ ] **Step 4: Run existing tests** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All pass. Some existing import tests may need updating if they reference `SettingsLive.Import` — update them to use `ImportWizardLive`. - -- [ ] **Step 5: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith_web/live/import_wizard_live.ex lib/kith_web/router.ex -git commit -m "feat: add ImportWizardLive with multi-source import wizard" -``` - ---- - -### Task 15: Enqueue async jobs after Monica import - -**Files:** -- Modify: `lib/kith/workers/import_source_worker.ex` - -After the Monica source completes `import/4`, the `ImportSourceWorker` needs to enqueue `PhotoSyncWorker` and `ApiSupplementWorker` jobs based on `import.api_options`. - -- [ ] **Step 1: Add post-import job scheduling to ImportSourceWorker** - -In `lib/kith/workers/import_source_worker.ex`, after the `source_mod.import/4` call succeeds, add: - -```elixir - # Enqueue async jobs for photo sync and API supplements - if import.api_options do - enqueue_async_jobs(import) - end -``` - -Add private function: - -```elixir - defp enqueue_async_jobs(%{api_url: nil}), do: :ok - defp enqueue_async_jobs(%{api_key_encrypted: nil}), do: :ok - defp enqueue_async_jobs(import) do - import_records = Kith.Imports.list_import_records(import.id) - - # Photo sync jobs - if import.api_options["photos"] || import.api_options[:photos] do - photo_records = Enum.filter(import_records, &(&1.source_entity_type == "photo")) - - photo_records - |> Enum.with_index() - |> Enum.each(fn {rec, idx} -> - batch = div(idx, 50) - delay = batch * 60 - - %{import_id: import.id, photo_id: rec.local_entity_id, source_photo_id: rec.source_entity_id} - |> Kith.Workers.PhotoSyncWorker.new(scheduled_at: DateTime.add(DateTime.utc_now(), delay, :second)) - |> Oban.insert() - end) - end - - # API supplement jobs — only for contacts that had first_met_date in the export. - # Re-read the file to determine which contacts need supplement data. - # This avoids storing per-contact flags and keeps import_records generic. - if import.api_options["first_met_details"] || import.api_options[:first_met_details] do - contacts_with_first_met = case Kith.Storage.read(import.file_storage_key) do - {:ok, data} -> - case Jason.decode(data) do - {:ok, parsed} -> - (get_in(parsed, ["contacts", "data"]) || []) - |> Enum.filter(fn c -> - date = get_in(c, ["first_met_date", "data", "date"]) - date != nil - end) - |> Enum.map(& &1["uuid"]) - |> MapSet.new() - _ -> MapSet.new() - end - _ -> MapSet.new() - end - - contact_records = - import_records - |> Enum.filter(&(&1.source_entity_type == "contact")) - |> Enum.filter(&MapSet.member?(contacts_with_first_met, &1.source_entity_id)) - - contact_records - |> Enum.with_index() - |> Enum.each(fn {rec, idx} -> - batch = div(idx, 50) - delay = batch * 60 - - %{ - import_id: import.id, - contact_id: rec.local_entity_id, - source_contact_id: rec.source_entity_id, - key: "first_met_details" - } - |> Kith.Workers.ApiSupplementWorker.new(scheduled_at: DateTime.add(DateTime.utc_now(), delay, :second)) - |> Oban.insert() - end) - end - end -``` - -- [ ] **Step 2: Add list_import_records to Imports context** - -In `lib/kith/imports.ex`, add: - -```elixir - def list_import_records(import_id) do - ImportRecord - |> where([r], r.import_id == ^import_id) - |> Repo.all() - end - - def count_import_records_by_type(import_id, entity_type) do - ImportRecord - |> where([r], r.import_id == ^import_id) - |> where([r], r.source_entity_type == ^entity_type) - |> Repo.aggregate(:count) - end -``` - -- [ ] **Step 3: Run full test suite** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All pass. - -- [ ] **Step 4: Commit** - -```bash -cd /Users/basharqassis/projects/kith -git add lib/kith/workers/import_source_worker.ex lib/kith/imports.ex -git commit -m "feat: enqueue photo sync and API supplement jobs after import" -``` - ---- - -### Task 16: Final verification - -- [ ] **Step 1: Run full test suite** - -Run: `cd /Users/basharqassis/projects/kith && mix test` -Expected: All tests pass. - -- [ ] **Step 2: Verify compilation with no warnings** - -Run: `cd /Users/basharqassis/projects/kith && mix compile --warnings-as-errors` -Expected: Clean compilation. - -- [ ] **Step 3: Verify migrations** - -Run: `cd /Users/basharqassis/projects/kith && mix ecto.rollback -n 2 && mix ecto.migrate` -Expected: Both migrations are reversible. - -- [ ] **Step 4: Manual smoke test** - -Start the server: `cd /Users/basharqassis/projects/kith && mix phx.server` -Navigate to `/settings/import`. Verify: -- Source selection (vCard/Monica tabs) renders -- File upload works for both types -- Validation shows summary -- VCard import runs end-to-end - -- [ ] **Step 5: Final commit if needed** - -```bash -cd /Users/basharqassis/projects/kith -git status -# Review and commit any remaining changes -``` diff --git a/docs/superpowers/plans/2026-05-15-account-reset-completeness.md b/docs/superpowers/plans/2026-05-15-account-reset-completeness.md new file mode 100644 index 0000000..bdf127d --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-account-reset-completeness.md @@ -0,0 +1,1964 @@ +# Account Reset Completeness Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `Kith.Workers.AccountResetWorker` fully wipe a single account's data (imports, conversations, journal, tasks, reminders, plus existing contacts/tags/activities/audit), cancel its in-flight Oban jobs first, while leaving every other account untouched. + +**Architecture:** The worker becomes a thin orchestrator that iterates over an ordered list of per-domain `Cleanup` modules. Each cleanup module exposes `wipe_for_account(account_id) :: :ok` and lives next to its domain (`Kith.Imports.Cleanup`, `Kith.Conversations.Cleanup`, etc.). Account scoping is enforced inside each cleanup with a `where: x.account_id == ^account_id` clause. In-flight Oban job cancellation queries `Oban.Job` directly with account-scoped filters (`import_id IN account's imports` / `account_id == this_account`). + +**Tech Stack:** Elixir, Phoenix, Ecto, Oban, PostgreSQL. Test framework: ExUnit + Oban.Testing. + +**Spec:** `docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md` + +**Worktree:** Work happens in the existing branch `fix/duplicate-detection` at `/Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection`. Each task is one commit; push at the end. + +--- + +## File structure (locked-in decomposition) + +**New files:** + +| Path | Responsibility | +|---|---| +| `lib/kith/imports/cleanup.ex` | Wipe `imports` + `import_records` for one account | +| `lib/kith/imports/job_cancellation.ex` | Cancel Oban jobs whose `args.import_id ∈ account's imports` (+ DuplicateDetection by `account_id`) | +| `lib/kith/conversations/cleanup.ex` | Wipe `conversations` (CASCADE → `messages`) | +| `lib/kith/journal/cleanup.ex` | Wipe `journal_entries` | +| `lib/kith/tasks/cleanup.ex` | Wipe `tasks` | +| `lib/kith/reminders/cleanup.ex` | Cancel reminder Oban jobs + wipe `reminders` (CASCADE → `reminder_rules` + `reminder_instances`) | +| `lib/kith/storage/account_cleanup.ex` | Delete photo + document + import-upload files | +| `lib/kith/contacts/cleanup.ex` | Hard-delete `contacts` (CASCADE) + wipe `tags` | +| `lib/kith/activities/cleanup.ex` | Wipe `activities` | +| `lib/kith/audit_logs/cleanup.ex` | Wipe `audit_logs` | + +**Refactored:** + +| Path | Change | +|---|---| +| `lib/kith/workers/account_reset_worker.ex` | Replace per-domain private helpers with an ordered `@cleaners` list and `Enum.each` orchestration | + +**New tests:** one per cleanup module, plus regression + isolation tests on the worker. + +--- + +## Task ordering rationale + +Each task delivers a new cleanup module + tests in one commit. Tasks 1–10 do NOT modify `AccountResetWorker` — they just create the new modules. Task 11 wires the worker to use them, in one commit, with the old private helpers removed. Task 12 adds the user-reported regression test plus the cross-account isolation test on the worker. + +This ordering means each task is independently reviewable, the worker change is one atomic commit, and the bug isn't half-fixed at any commit boundary. + +--- + +## Task 1: `Kith.Imports.Cleanup` + +**Files:** +- Create: `lib/kith/imports/cleanup.ex` +- Create: `test/kith/imports/cleanup_test.exs` + +This is the most bug-critical module — the user's photo sync failure traces directly to orphaned `import_records`. Do it first so end-to-end testing on dev can validate the fix early. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/imports/cleanup_test.exs`: + +```elixir +defmodule Kith.Imports.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Imports + alias Kith.Imports.{Cleanup, Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes imports + import_records for target account; leaves other account untouched", ctx do + target_import = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(ctx.other_account, ctx.other_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + {:ok, _} = Imports.record_imported_entity(target_import, "contact", "1", "contact", 999) + {:ok, _} = Imports.record_imported_entity(other_import, "contact", "1", "contact", 999) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Import, ctx.target_account) == 0 + assert count_for(ImportRecord, ctx.target_account) == 0 + + # Control account untouched + assert count_for(Import, ctx.other_account) == 1 + assert count_for(ImportRecord, ctx.other_account) == 1 + end + + test "is idempotent on an account with no import data", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/imports/cleanup_test.exs +``` + +Expected: compile error — `Kith.Imports.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/imports/cleanup.ex`: + +```elixir +defmodule Kith.Imports.Cleanup do + @moduledoc """ + Wipes all import history for a single account. + + Deletes `import_records` first then `imports`. Both tables are scoped by + `account_id` directly. Called by `Kith.Workers.AccountResetWorker`. + """ + + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {records, _} = + Repo.delete_all(from(r in ImportRecord, where: r.account_id == ^account_id)) + + {imports, _} = + Repo.delete_all(from(i in Import, where: i.account_id == ^account_id)) + + Logger.info( + "[Imports.Cleanup] wiped #{records} record(s) + #{imports} import(s) for account #{account_id}" + ) + + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/imports/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/imports/cleanup.ex test/kith/imports/cleanup_test.exs +git commit -m "feat: add Kith.Imports.Cleanup for account-scoped import wipe" +``` + +--- + +## Task 2: `Kith.Imports.JobCancellation` + +**Files:** +- Create: `lib/kith/imports/job_cancellation.ex` +- Create: `test/kith/imports/job_cancellation_test.exs` + +Cancels pending/scheduled/retryable/executing Oban jobs for this account's imports. Matches by `args.import_id IN (account's imports)` for the four import-worker classes, plus `args.account_id == this_account` for `DuplicateDetectionWorker`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/imports/job_cancellation_test.exs`: + +```elixir +defmodule Kith.Imports.JobCancellationTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Imports.JobCancellation + alias Kith.Repo + alias Kith.Workers.{DuplicateDetectionWorker, MonicaPhotoSyncWorker} + + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + target_import = + import_fixture(target.account_id, target.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(other.account_id, other.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + %{ + target_account: target.account_id, + target_import: target_import, + other_account: other.account_id, + other_import: other_import + } + end + + test "cancels target account's import jobs; leaves other account's jobs alone", ctx do + {:ok, target_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + {:ok, other_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.other_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_photo_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_photo_job.id).state == "available" + end + + test "cancels DuplicateDetectionWorker jobs by account_id", ctx do + {:ok, target_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.target_account})) + + {:ok, other_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.other_account})) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_dup_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_dup_job.id).state == "available" + end + + test "is a no-op when account has no jobs", ctx do + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + end + + test "ignores jobs already in 'completed' state", ctx do + {:ok, completed_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + # Manually mark as completed + completed_job + |> Ecto.Changeset.change(state: "completed", completed_at: DateTime.utc_now()) + |> Repo.update!() + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + # Completed jobs are NOT touched + assert Repo.get!(Oban.Job, completed_job.id).state == "completed" + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/imports/job_cancellation_test.exs +``` + +Expected: compile error — `Kith.Imports.JobCancellation` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/imports/job_cancellation.ex`: + +```elixir +defmodule Kith.Imports.JobCancellation do + @moduledoc """ + Cancels all pending/scheduled/retryable/executing Oban jobs that belong to a + single account's imports. + + Scoping rule: only jobs whose args reference this account (directly via + `account_id` or transitively via `import_id` belonging to one of this + account's imports) are touched. No other account's jobs are affected. + """ + + alias Kith.Imports.Import + alias Kith.Repo + + import Ecto.Query + require Logger + + @import_workers ~w[ + Elixir.Kith.Workers.MonicaApiCrawlWorker + Elixir.Kith.Workers.MonicaPhotoSyncWorker + Elixir.Kith.Workers.MonicaDocumentImportWorker + Elixir.Kith.Workers.ImportSourceWorker + ] + + @cancellable_states ~w[available scheduled retryable executing] + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + import_ids = account_import_ids(account_id) + import_cancelled = cancel_jobs_by_import_id(import_ids) + account_cancelled = cancel_jobs_by_account_id(account_id) + + Logger.info( + "[Imports.JobCancellation] cancelled #{import_cancelled} import job(s) + " <> + "#{account_cancelled} account-scoped job(s) for account #{account_id}" + ) + + :ok + end + + defp account_import_ids(account_id) do + Repo.all(from(i in Import, where: i.account_id == ^account_id, select: i.id)) + end + + defp cancel_jobs_by_import_id([]), do: 0 + + defp cancel_jobs_by_import_id(import_ids) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker in ^@import_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'import_id')::int", j.args) in ^import_ids + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end + + defp cancel_jobs_by_account_id(account_id) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker == "Elixir.Kith.Workers.DuplicateDetectionWorker", + where: j.state in ^@cancellable_states, + where: fragment("(?->>'account_id')::int", j.args) == ^account_id + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/imports/job_cancellation_test.exs +``` + +Expected: 4 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/imports/job_cancellation.ex test/kith/imports/job_cancellation_test.exs +git commit -m "feat: add Kith.Imports.JobCancellation for account-scoped Oban cancel" +``` + +--- + +## Task 3: `Kith.Storage.AccountCleanup` + +**Files:** +- Create: `lib/kith/storage/account_cleanup.ex` +- Create: `test/kith/storage/account_cleanup_test.exs` + +Iterates storage keys for the account's photos, documents, and import uploads, calls `Kith.Storage.delete/1` on each. Logs warnings on failure but never raises (storage failures must not abort the reset). + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/storage/account_cleanup_test.exs`: + +```elixir +defmodule Kith.Storage.AccountCleanupTest do + use Kith.DataCase, async: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Storage + alias Kith.Storage.AccountCleanup + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "deletes target account's photo + import-upload files; leaves other account's files alone", + ctx do + {target_photo_key, _} = upload_and_attach_photo!(ctx.target_account) + {other_photo_key, _} = upload_and_attach_photo!(ctx.other_account) + + target_upload_key = upload_import_file!(ctx.target_account, ctx.target_user) + other_upload_key = upload_import_file!(ctx.other_account, ctx.other_user) + + assert {:ok, _} = Storage.read(target_photo_key) + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(target_upload_key) + assert {:ok, _} = Storage.read(other_upload_key) + + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + + assert {:error, _} = Storage.read(target_photo_key) + assert {:error, _} = Storage.read(target_upload_key) + + # Control account untouched + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(other_upload_key) + end + + test "is a no-op when account has no files", ctx do + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + end + + defp upload_and_attach_photo!(account_id) do + contact = contact_fixture(account_id) + binary = <<0xFF, 0xD8, 0xFF, 0xE0>> + key = Storage.generate_key(account_id, "photos", "test.jpg") + {:ok, _} = Storage.upload_binary(binary, key) + + {:ok, photo} = + Contacts.create_photo(contact, %{ + "file_name" => "test.jpg", + "storage_key" => key, + "file_size" => byte_size(binary), + "content_type" => "image/jpeg" + }) + + {key, photo} + end + + defp upload_import_file!(account_id, user_id) do + key = Storage.generate_key(account_id, "imports", "export.vcf") + {:ok, _} = Storage.upload_binary("BEGIN:VCARD\nEND:VCARD\n", key) + + {:ok, _} = + Imports.create_import(account_id, user_id, %{ + source: "vcard", + file_name: "export.vcf", + file_size: 22, + file_storage_key: key + }) + + key + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/storage/account_cleanup_test.exs +``` + +Expected: compile error — `Kith.Storage.AccountCleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/storage/account_cleanup.ex`: + +```elixir +defmodule Kith.Storage.AccountCleanup do + @moduledoc """ + Deletes physical storage objects (photos, documents, import upload files) + for a single account. + + Storage failures (S3 already-deleted, network blip) are logged at `:warning` + but never raise — they must not abort the surrounding account reset. + Storage objects are recoverable separately (S3 lifecycle, manual sweep) + and don't affect data integrity. + + Must run BEFORE `Kith.Contacts.Cleanup` — once contacts are hard-deleted, + the `photos` and `documents` rows are CASCADE-deleted and we can no longer + iterate their `storage_key` values. + """ + + alias Kith.Contacts.{Contact, Document, Photo} + alias Kith.Imports.Import + alias Kith.{Repo, Storage} + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + photo_count = delete_keys(photo_keys(account_id)) + document_count = delete_keys(document_keys(account_id)) + upload_count = delete_keys(import_upload_keys(account_id)) + + Logger.info( + "[Storage.AccountCleanup] deleted #{photo_count} photo file(s) + " <> + "#{document_count} document file(s) + #{upload_count} import upload(s) " <> + "for account #{account_id}" + ) + + :ok + end + + defp photo_keys(account_id) do + Repo.all( + from(p in Photo, + join: c in Contact, + on: p.contact_id == c.id, + where: c.account_id == ^account_id, + select: p.storage_key + ) + ) + end + + defp document_keys(account_id) do + Repo.all( + from(d in Document, + join: c in Contact, + on: d.contact_id == c.id, + where: c.account_id == ^account_id, + select: d.storage_key + ) + ) + end + + defp import_upload_keys(account_id) do + Repo.all( + from(i in Import, + where: i.account_id == ^account_id, + where: not is_nil(i.file_storage_key), + select: i.file_storage_key + ) + ) + end + + defp delete_keys(keys) do + Enum.each(keys, &safe_delete/1) + length(keys) + end + + defp safe_delete(nil), do: :ok + + defp safe_delete(key) do + case Storage.delete(key) do + :ok -> + :ok + + {:error, reason} -> + Logger.warning("[Storage.AccountCleanup] failed to delete #{key}: #{inspect(reason)}") + :ok + end + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/storage/account_cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/storage/account_cleanup.ex test/kith/storage/account_cleanup_test.exs +git commit -m "feat: add Kith.Storage.AccountCleanup for account-scoped file wipe" +``` + +--- + +## Task 4: `Kith.Contacts.Cleanup` + +**Files:** +- Create: `lib/kith/contacts/cleanup.ex` +- Create: `test/kith/contacts/cleanup_test.exs` + +Hard-deletes contacts (FK CASCADE handles addresses, contact_fields, photos rows, documents rows, notes, debts, gifts, pets, emotions, relationships, calls, life_events, duplicate_candidates, immich_candidates). Also wipes `tags` (account-scoped, no contact FK). Tags share the contacts axis-of-change so they're colocated. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/contacts/cleanup_test.exs`: + +```elixir +defmodule Kith.Contacts.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Contacts.{Cleanup, Contact, Tag} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "hard-deletes contacts + tags for target account; leaves other account untouched", ctx do + contact_fixture(ctx.target_account) + contact_fixture(ctx.target_account) + contact_fixture(ctx.other_account) + + Repo.insert!(%Tag{account_id: ctx.target_account, name: "target-tag"}) + Repo.insert!(%Tag{account_id: ctx.other_account, name: "other-tag"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Contact, ctx.target_account) == 0 + assert count_for(Tag, ctx.target_account) == 0 + + assert count_for(Contact, ctx.other_account) == 1 + assert count_for(Tag, ctx.other_account) == 1 + end + + test "ignores soft-deleted vs not — hard-deletes both", ctx do + active = contact_fixture(ctx.target_account) + soft = contact_fixture(ctx.target_account) + + soft + |> Ecto.Changeset.change(deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + refute Repo.get(Contact, active.id) + refute Repo.get(Contact, soft.id) + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/contacts/cleanup_test.exs +``` + +Expected: compile error — `Kith.Contacts.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/contacts/cleanup.ex`: + +```elixir +defmodule Kith.Contacts.Cleanup do + @moduledoc """ + Hard-deletes all contacts (and CASCADE sub-entities) and account-scoped + tags for a single account. + + Sub-entities cleared via FK CASCADE: addresses, contact_fields, photos + (rows), documents (rows), notes, debts, gifts, pets, emotions, + relationships, calls, life_events, duplicate_candidates, immich_candidates. + + Note: `Kith.Storage.AccountCleanup` MUST run before this module so that + photo/document storage_keys can be enumerated before their rows are wiped. + + Tags are wiped here (not in a separate module) because they share the + contacts axis-of-change and have no other purpose. + """ + + alias Kith.Contacts.{Contact, Tag} + alias Kith.Repo + + import Ecto.Query + require Logger + + @batch_size 200 + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + contacts_deleted = delete_contacts_in_batches(account_id, 0) + + {tags_deleted, _} = + Repo.delete_all(from(t in Tag, where: t.account_id == ^account_id)) + + Logger.info( + "[Contacts.Cleanup] hard-deleted #{contacts_deleted} contact(s) + " <> + "#{tags_deleted} tag(s) for account #{account_id}" + ) + + :ok + end + + defp delete_contacts_in_batches(account_id, acc) do + ids = + Repo.all( + from(c in Contact, + where: c.account_id == ^account_id, + select: c.id, + limit: @batch_size + ) + ) + + case ids do + [] -> + acc + + _ -> + {deleted, _} = Repo.delete_all(from(c in Contact, where: c.id in ^ids)) + delete_contacts_in_batches(account_id, acc + deleted) + end + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/contacts/cleanup_test.exs +``` + +Expected: 3 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/contacts/cleanup.ex test/kith/contacts/cleanup_test.exs +git commit -m "feat: add Kith.Contacts.Cleanup for account-scoped contacts+tags wipe" +``` + +--- + +## Task 5: `Kith.Conversations.Cleanup` + +**Files:** +- Create: `lib/kith/conversations/cleanup.ex` +- Create: `test/kith/conversations/cleanup_test.exs` + +Wipes `conversations` rows; CASCADE removes `messages`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/conversations/cleanup_test.exs`: + +```elixir +defmodule Kith.Conversations.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Conversations.{Cleanup, Conversation, Message} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes conversations (CASCADE messages) for target; leaves other untouched", ctx do + target_conv = insert_conversation!(ctx.target_account, ctx.target_user, ctx.target_contact.id) + other_conv = insert_conversation!(ctx.other_account, ctx.other_user, ctx.other_contact.id) + + insert_message!(target_conv.id, ctx.target_account) + insert_message!(other_conv.id, ctx.other_account) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Conversation, ctx.target_account) == 0 + assert count_for(Message, ctx.target_account) == 0 + + assert count_for(Conversation, ctx.other_account) == 1 + assert count_for(Message, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp insert_conversation!(account_id, user_id, contact_id) do + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact_id, + subject: "test", + platform: "other", + status: "active", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + end + + defp insert_message!(conversation_id, account_id) do + Repo.insert!(%Message{ + account_id: account_id, + conversation_id: conversation_id, + body: "hi", + direction: "outgoing", + sent_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: If the `Conversation` or `Message` schema fields shown above don't match the actual schema (check `lib/kith/conversations/conversation.ex` and `lib/kith/conversations/message.ex`), adjust the test inserts to satisfy the schema. Required fields per the conversation schema reading are `account_id`, `creator_id`, `contact_id`, `subject`, `occurred_at`. Required for messages: `conversation_id`, `body`, `sent_at`. Read the schemas if any insert fails. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/conversations/cleanup_test.exs +``` + +Expected: compile error — `Kith.Conversations.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/conversations/cleanup.ex`: + +```elixir +defmodule Kith.Conversations.Cleanup do + @moduledoc """ + Wipes all conversations for a single account. FK CASCADE removes the + associated `messages` rows. + """ + + alias Kith.Conversations.Conversation + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(c in Conversation, where: c.account_id == ^account_id)) + + Logger.info("[Conversations.Cleanup] wiped #{count} conversation(s) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/conversations/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +If the insert step fails because of schema mismatch, read `lib/kith/conversations/conversation.ex` and `lib/kith/conversations/message.ex`, fix the test setup, and re-run. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/conversations/cleanup.ex test/kith/conversations/cleanup_test.exs +git commit -m "feat: add Kith.Conversations.Cleanup for account-scoped conversation wipe" +``` + +--- + +## Task 6: `Kith.Journal.Cleanup` + +**Files:** +- Create: `lib/kith/journal/cleanup.ex` +- Create: `test/kith/journal/cleanup_test.exs` + +Wipes `journal_entries`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/journal/cleanup_test.exs`: + +```elixir +defmodule Kith.Journal.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Journal + alias Kith.Journal.{Cleanup, Entry} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes journal entries for target account only", ctx do + {:ok, _} = + Journal.create_entry(ctx.target_account, ctx.target_user, %{ + "content" => "target", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + {:ok, _} = + Journal.create_entry(ctx.other_account, ctx.other_user, %{ + "content" => "other", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Entry, ctx.target_account) == 0 + assert count_for(Entry, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: `Journal.create_entry/3` may accept atom or string-keyed attrs. If the test fails on map shape, read `lib/kith/journal.ex:47` for the signature and adjust. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/journal/cleanup_test.exs +``` + +Expected: compile error — `Kith.Journal.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/journal/cleanup.ex`: + +```elixir +defmodule Kith.Journal.Cleanup do + @moduledoc """ + Wipes all journal entries for a single account. + """ + + alias Kith.Journal.Entry + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(e in Entry, where: e.account_id == ^account_id)) + + Logger.info("[Journal.Cleanup] wiped #{count} journal entr(ies) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/journal/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/journal/cleanup.ex test/kith/journal/cleanup_test.exs +git commit -m "feat: add Kith.Journal.Cleanup for account-scoped journal wipe" +``` + +--- + +## Task 7: `Kith.Tasks.Cleanup` + +**Files:** +- Create: `lib/kith/tasks/cleanup.ex` +- Create: `test/kith/tasks/cleanup_test.exs` + +Wipes `tasks`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/tasks/cleanup_test.exs`: + +```elixir +defmodule Kith.Tasks.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Repo + alias Kith.Tasks + alias Kith.Tasks.{Cleanup, Task} + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes tasks for target account only", ctx do + {:ok, _} = Tasks.create_task(ctx.target_account, ctx.target_user, %{"title" => "target task"}) + {:ok, _} = Tasks.create_task(ctx.other_account, ctx.other_user, %{"title" => "other task"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Task, ctx.target_account) == 0 + assert count_for(Task, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/tasks/cleanup_test.exs +``` + +Expected: compile error — `Kith.Tasks.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/tasks/cleanup.ex`: + +```elixir +defmodule Kith.Tasks.Cleanup do + @moduledoc """ + Wipes all tasks for a single account. + """ + + alias Kith.Repo + alias Kith.Tasks.Task + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(t in Task, where: t.account_id == ^account_id)) + + Logger.info("[Tasks.Cleanup] wiped #{count} task(s) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/tasks/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/tasks/cleanup.ex test/kith/tasks/cleanup_test.exs +git commit -m "feat: add Kith.Tasks.Cleanup for account-scoped task wipe" +``` + +--- + +## Task 8: `Kith.Reminders.Cleanup` + +**Files:** +- Create: `lib/kith/reminders/cleanup.ex` +- Create: `test/kith/reminders/cleanup_test.exs` + +Cancels Oban jobs tracked in `reminders.enqueued_oban_job_ids` (matching the existing `cancel_reminder_jobs/1` pattern from the current worker), then deletes reminders; CASCADE removes `reminder_rules` and `reminder_instances`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/reminders/cleanup_test.exs`: + +```elixir +defmodule Kith.Reminders.CleanupTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Reminders.{Cleanup, Reminder, ReminderInstance, ReminderRule} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes reminders + CASCADE rules/instances for target only", ctx do + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + other_reminder = reminder_fixture(ctx.other_account, ctx.other_contact.id, ctx.other_user) + + _target_instance = reminder_instance_fixture(target_reminder) + _other_instance = reminder_instance_fixture(other_reminder) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Reminder, ctx.target_account) == 0 + # rules + instances reference reminder_id, so we count them via the join: + assert count_orphans(ReminderRule, [target_reminder.id]) == 0 + assert count_orphans(ReminderInstance, [target_reminder.id]) == 0 + + assert count_for(Reminder, ctx.other_account) == 1 + end + + test "cancels Oban jobs tracked on the target's reminders", ctx do + # Insert a real Oban job and attach its id to a reminder + {:ok, job} = + Oban.insert(Kith.Workers.ReminderNotificationWorker.new(%{"reminder_instance_id" => 0})) + + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + + target_reminder + |> Ecto.Changeset.change(enqueued_oban_job_ids: [job.id]) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, job.id).state == "cancelled" + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end + + defp count_orphans(schema, reminder_ids) do + Repo.aggregate(from(s in schema, where: s.reminder_id in ^reminder_ids), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/reminders/cleanup_test.exs +``` + +Expected: compile error — `Kith.Reminders.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/reminders/cleanup.ex`: + +```elixir +defmodule Kith.Reminders.Cleanup do + @moduledoc """ + Cancels all Oban jobs tracked on the account's reminders, then deletes + the reminders. FK CASCADE removes `reminder_rules` and `reminder_instances`. + """ + + alias Kith.Reminders.Reminder + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + cancel_oban_jobs_for_account(account_id) + + {count, _} = + Repo.delete_all(from(r in Reminder, where: r.account_id == ^account_id)) + + Logger.info("[Reminders.Cleanup] wiped #{count} reminder(s) for account #{account_id}") + :ok + end + + defp cancel_oban_jobs_for_account(account_id) do + job_ids = + Repo.all( + from(r in Reminder, + where: r.account_id == ^account_id, + select: r.enqueued_oban_job_ids + ) + ) + |> List.flatten() + |> Enum.reject(&is_nil/1) + + Enum.each(job_ids, &Oban.cancel_job/1) + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/reminders/cleanup_test.exs +``` + +Expected: 3 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/reminders/cleanup.ex test/kith/reminders/cleanup_test.exs +git commit -m "feat: add Kith.Reminders.Cleanup for account-scoped reminder wipe" +``` + +--- + +## Task 9: `Kith.Activities.Cleanup` + +**Files:** +- Create: `lib/kith/activities/cleanup.ex` +- Create: `test/kith/activities/cleanup_test.exs` + +Wipes `activities` (account-scoped). No contact FK, so this isn't cleared by `Contacts.Cleanup`. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/activities/cleanup_test.exs`: + +```elixir +defmodule Kith.Activities.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Activities.{Activity, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes activities for target account only", ctx do + Repo.insert!(%Activity{ + account_id: ctx.target_account, + summary: "target activity", + happened_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Activity{ + account_id: ctx.other_account, + summary: "other activity", + happened_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Activity, ctx.target_account) == 0 + assert count_for(Activity, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: If the `Activity` schema requires different fields (read `lib/kith/activities/activity.ex` if the insert fails), adjust the test setup. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/activities/cleanup_test.exs +``` + +Expected: compile error — `Kith.Activities.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/activities/cleanup.ex`: + +```elixir +defmodule Kith.Activities.Cleanup do + @moduledoc """ + Wipes all account-scoped activities for a single account. Activities have + no contact FK so they are not cleared by `Kith.Contacts.Cleanup`'s CASCADE. + """ + + alias Kith.Activities.Activity + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in Activity, where: a.account_id == ^account_id)) + + Logger.info("[Activities.Cleanup] wiped #{count} activit(ies) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/activities/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/activities/cleanup.ex test/kith/activities/cleanup_test.exs +git commit -m "feat: add Kith.Activities.Cleanup for account-scoped activity wipe" +``` + +--- + +## Task 10: `Kith.AuditLogs.Cleanup` + +**Files:** +- Create: `lib/kith/audit_logs/cleanup.ex` +- Create: `test/kith/audit_logs/cleanup_test.exs` + +Wipes `audit_logs`. Runs LAST in the worker pipeline so the "account_data_reset" audit log written at start lives until cleanup is done. + +- [ ] **Step 1: Write the failing test** + +Create `test/kith/audit_logs/cleanup_test.exs`: + +```elixir +defmodule Kith.AuditLogs.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.AuditLogs + alias Kith.AuditLogs.{AuditLog, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes audit logs for target account only", ctx do + {:ok, _} = + AuditLogs.create_audit_log(ctx.target_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + {:ok, _} = + AuditLogs.create_audit_log(ctx.other_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(AuditLog, ctx.target_account) == 0 + assert count_for(AuditLog, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +mix test test/kith/audit_logs/cleanup_test.exs +``` + +Expected: compile error — `Kith.AuditLogs.Cleanup` does not exist. + +- [ ] **Step 3: Implement the module** + +Create `lib/kith/audit_logs/cleanup.ex`: + +```elixir +defmodule Kith.AuditLogs.Cleanup do + @moduledoc """ + Wipes all audit logs for a single account. Runs LAST in the reset pipeline + so the "account_data_reset" log written at the start of the worker lives + until the rest of cleanup completes. + """ + + alias Kith.AuditLogs.AuditLog + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in AuditLog, where: a.account_id == ^account_id)) + + Logger.info("[AuditLogs.Cleanup] wiped #{count} audit log(s) for account #{account_id}") + :ok + end +end +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +mix test test/kith/audit_logs/cleanup_test.exs +``` + +Expected: 2 tests, 0 failures. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/audit_logs/cleanup.ex test/kith/audit_logs/cleanup_test.exs +git commit -m "feat: add Kith.AuditLogs.Cleanup for account-scoped audit-log wipe" +``` + +--- + +## Task 11: Refactor `AccountResetWorker` to orchestrator + +**Files:** +- Modify: `lib/kith/workers/account_reset_worker.ex` (full rewrite of the worker body) + +Replace all per-domain private helpers with the ordered `@cleaners` list. Worker becomes ~40 LoC. + +- [ ] **Step 1: Replace the entire worker file content** + +Open `lib/kith/workers/account_reset_worker.ex` and replace the full content with: + +```elixir +defmodule Kith.Workers.AccountResetWorker do + @moduledoc """ + Resets a single account's data by orchestrating per-domain cleanup modules. + + Wipes everything the account owns except reference data (genders, + relationship_types, contact_field_types, etc.) and account_invitations. + Operations are scoped to the target account; no other account is affected. + + Each `@cleaners` module exposes `wipe_for_account(account_id) :: :ok`. + Order is load-bearing — see `docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md`. + """ + + use Oban.Worker, + queue: :default, + max_attempts: 3, + unique: [period: 300, fields: [:args], keys: [:account_id]] + + alias Kith.{Activities, AuditLogs, Contacts, Conversations, Imports, Journal, + Reminders, Storage, Tasks} + + require Logger + + @cleaners [ + Imports.JobCancellation, + Storage.AccountCleanup, + Contacts.Cleanup, + Imports.Cleanup, + Conversations.Cleanup, + Reminders.Cleanup, + Tasks.Cleanup, + Journal.Cleanup, + Activities.Cleanup, + AuditLogs.Cleanup + ] + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"account_id" => account_id}}) do + Logger.metadata(account_id: account_id, worker: "AccountReset") + Logger.info("[AccountReset] starting reset for account #{account_id}") + write_initiated_audit_log(account_id) + + Enum.each(@cleaners, fn cleaner -> + Logger.info("[AccountReset] running #{inspect(cleaner)}") + :ok = cleaner.wipe_for_account(account_id) + end) + + Logger.info("[AccountReset] completed reset for account #{account_id}") + :ok + end + + defp write_initiated_audit_log(account_id) do + AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{reason: "Account data reset initiated"} + }) + end +end +``` + +- [ ] **Step 2: Run the existing worker test to ensure no regression** + +```bash +mix test test/kith_web/live/settings_live/account_live_test.exs +``` + +The existing test only asserts that the job is enqueued (no behavior assertions on cleanup). Expected: 0 failures. + +- [ ] **Step 3: Run the full test suite to catch incidental breakage** + +```bash +mix test +``` + +Expected: all tests pass. The 9 new cleanup modules + the worker are now exercised together. + +- [ ] **Step 4: Run `mix format` to normalize the new file** + +```bash +mix format +``` + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/workers/account_reset_worker.ex +git commit -m "refactor: AccountResetWorker becomes orchestrator over per-domain Cleanup modules" +``` + +--- + +## Task 12: Regression + cross-account isolation tests on the worker + +**Files:** +- Modify: `test/kith/workers/account_reset_worker_test.exs` (create if doesn't exist; the existing coverage is in `account_live_test.exs`) + +Adds the user-reported scenario (re-import-after-reset succeeds) and a snapshot-based cross-account isolation test. + +- [ ] **Step 1: Check whether the test file already exists** + +```bash +ls test/kith/workers/account_reset_worker_test.exs 2>/dev/null && echo "exists" || echo "missing" +``` + +If "missing", create it from scratch with the content below. If "exists", open the file and add the two new tests inside the existing `describe "perform/1"` block, preserving any existing tests. + +- [ ] **Step 2: Write the file (or append the tests)** + +Full file content (use this if creating, or merge the tests if appending): + +```elixir +defmodule Kith.Workers.AccountResetWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Activities.Activity + alias Kith.AuditLogs.AuditLog + alias Kith.Contacts.{Contact, Tag} + alias Kith.Conversations.Conversation + alias Kith.Imports + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Journal.Entry + alias Kith.Reminders.Reminder + alias Kith.Repo + alias Kith.Tasks.Task, as: TaskSchema + alias Kith.Workers.AccountResetWorker + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + describe "perform/1 — regression: re-import after reset" do + test "re-import for same Monica contact id resolves to new local contact (no stale import_records)", + ctx do + # Initial import: contact + import_record for Monica id 964 + import_a = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_a = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_a, "contact", "964", "contact", contact_a.id) + + # Run reset + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target account fully wiped + assert count(Contact, ctx.target_account) == 0 + assert count(Import, ctx.target_account) == 0 + assert count(ImportRecord, ctx.target_account) == 0 + + # Re-import: new contact + new import_record for the same Monica id + import_b = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_b = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_b, "contact", "964", "contact", contact_b.id) + + # The photo-sync lookup that previously found stale data now resolves correctly + assert %{local_entity_id: local_id} = + Imports.find_import_record(ctx.target_account, "monica_api", "contact", "964") + + assert local_id == contact_b.id + end + end + + describe "perform/1 — cross-account isolation" do + test "resetting account A does not touch any data in account B", ctx do + target_contact = populate_data!(ctx.target_account, ctx.target_user) + _other_contact = populate_data!(ctx.other_account, ctx.other_user) + + before_other = snapshot(ctx.other_account) + + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target wiped across every domain + assert empty?(ctx.target_account) + + # Other account is bit-identical to before + assert snapshot(ctx.other_account) == before_other + + # Sanity: target_contact is gone, other account still has its contact + refute Repo.get(Contact, target_contact.id) + end + end + + defp populate_data!(account_id, user_id) do + contact = contact_fixture(account_id) + + {:ok, _} = + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + |> then(&Imports.record_imported_entity(&1, "contact", "1", "contact", contact.id)) + + Repo.insert!(%Tag{account_id: account_id, name: "t"}) + + Repo.insert!(%Activity{ + account_id: account_id, + summary: "a", + happened_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%TaskSchema{ + account_id: account_id, + creator_id: user_id, + title: "x" + }) + + Repo.insert!(%Entry{ + account_id: account_id, + author_id: user_id, + content: "c", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact.id, + subject: "s", + platform: "other", + status: "active", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + _reminder = reminder_fixture(account_id, contact.id, user_id) + + {:ok, _} = + Kith.AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "test", + event: "account_data_reset", + metadata: %{} + }) + + contact + end + + defp snapshot(account_id) do + %{ + contacts: count(Contact, account_id), + imports: count(Import, account_id), + import_records: count(ImportRecord, account_id), + conversations: count(Conversation, account_id), + tasks: count(TaskSchema, account_id), + journal_entries: count(Entry, account_id), + reminders: count(Reminder, account_id), + tags: count(Tag, account_id), + activities: count(Activity, account_id), + audit_logs: count(AuditLog, account_id) + } + end + + defp empty?(account_id) do + snapshot(account_id) == + %{ + contacts: 0, + imports: 0, + import_records: 0, + conversations: 0, + tasks: 0, + journal_entries: 0, + reminders: 0, + tags: 0, + activities: 0, + audit_logs: 0 + } + end + + defp count(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end +``` + +NOTE: If `populate_data!` fails to insert any record due to schema mismatch (e.g. an `Activity` requires `kind` or `actor_id`), read the schema file (`lib/kith/activities/activity.ex` etc.) and add the missing required fields. The shape above is based on the moduledoc reading — adjust as needed. + +- [ ] **Step 3: Run the new tests** + +```bash +mix test test/kith/workers/account_reset_worker_test.exs +``` + +Expected: 2 tests, 0 failures. If a schema insert fails, fix the populate_data! helper and re-run. + +- [ ] **Step 4: Run the FULL test suite** + +```bash +mix test +``` + +Expected: all tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add test/kith/workers/account_reset_worker_test.exs +git commit -m "test: add regression + cross-account isolation tests for AccountResetWorker" +``` + +--- + +## Task 13: Final verification + push + +- [ ] **Step 1: Verify the full quality pipeline** + +```bash +mix quality +``` + +Expected: compile + format + credo + sobelow + dialyzer all clean. The pre-commit hook will have caught most issues already, but run once explicitly. + +- [ ] **Step 2: Confirm no stale references to deleted private helpers in the worker** + +```bash +grep -n "delete_contacts_in_batches\|delete_tags\|delete_activities\|delete_audit_logs\|delete_stored_files\|cancel_reminder_jobs" lib/kith/workers/account_reset_worker.ex +``` + +Expected: no matches (all moved into cleanup modules). + +- [ ] **Step 3: Push the branch** + +```bash +git push +``` + +- [ ] **Step 4: Manual verification on dev (operator step)** + +The implementing engineer should report this step to the operator: + +> On the dev environment: +> 1. Run a Monica API import with the "Import photos" option checked. +> 2. Trigger account reset via Settings → Account. +> 3. Re-run the same Monica API import with photos. +> 4. Confirm `MonicaPhotoSyncWorker` completes successfully (no "contact is deleted" errors). +> 5. Tail `log/dev.log | grep -E '\[AccountReset|Cleanup|JobCancellation\]'` — should show the structured per-step progress. + +If the manual test surfaces an issue, file it as a follow-up — the spec's automated tests (regression + isolation) should have caught any structural breakage. + +--- + +## Spec coverage check (skill-required self-review) + +Each spec requirement → corresponding task: + +| Spec requirement | Tasks | +|---|---| +| Wipe `imports` + `import_records` | Task 1 | +| Cancel in-flight Oban jobs (import_id + account_id scoped) | Task 2 | +| Wipe stored files (photos, documents, import uploads) | Task 3 | +| Wipe contacts (CASCADE) + tags | Task 4 | +| Wipe conversations (CASCADE → messages) | Task 5 | +| Wipe journal_entries | Task 6 | +| Wipe tasks | Task 7 | +| Wipe reminders + cancel their Oban jobs (CASCADE → rules, instances) | Task 8 | +| Wipe activities | Task 9 | +| Wipe audit_logs (last) | Task 10 | +| Worker becomes orchestrator; old helpers removed | Task 11 | +| Regression test for user-reported bug | Task 12 | +| Cross-account isolation test on worker | Task 12 | +| Every cleanup module has a "control account untouched" assertion | Tasks 1–10 | +| Idempotency assertion in every cleanup module | Tasks 1–10 | +| Order: jobs → files → contacts → imports → conversations → reminders → tasks → journal → activities → audit | Task 11 (`@cleaners` list) | +| `safe_delete_file/1` warn-and-continue (no raise on storage errors) | Task 3 | + +All requirements covered. diff --git a/docs/superpowers/plans/2026-05-16-monica-import-deployment-fixes.md b/docs/superpowers/plans/2026-05-16-monica-import-deployment-fixes.md new file mode 100644 index 0000000..a6f9c82 --- /dev/null +++ b/docs/superpowers/plans/2026-05-16-monica-import-deployment-fixes.md @@ -0,0 +1,543 @@ +# Monica Import Deployment Fixes Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Stop Monica imports from crashing on the worker container, route Oban jobs exclusively to the worker, and cluster the two BEAM nodes so LiveView progress broadcasts cross containers. + +**Architecture:** Move `Phoenix.PubSub` + `DNSCluster` from `mode_children/0` to `base_children/0` so worker mode also starts them. In `runtime.exs` (`:prod` only), gate Oban to insert-only when `KITH_MODE=web`. In `docker-compose.prod.yml`, give each container a unique hostname plus a shared network alias (`kith-cluster`), share `RELEASE_COOKIE`, and set `DNS_CLUSTER_QUERY=kith-cluster`. Phoenix.PubSub's default PG2 adapter then fans broadcasts across both nodes automatically. + +**Tech Stack:** Elixir 1.18, Phoenix LiveView, Phoenix.PubSub (PG2), DNSCluster 0.2+, Oban 2.18, Docker Compose v2. + +**Reference spec:** `docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md` + +--- + +## Task 1: Move PubSub + DNSCluster to base_children + +**Files:** +- Modify: `lib/kith/application.ex` + +- [ ] **Step 1: Inspect the current supervisor tree** + +Run: `grep -n "Phoenix.PubSub\|DNSCluster\|mode_children\|base_children" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/application.ex` + +Expected: matches at the function heads of `base_children/0` and `mode_children/0`, plus the existing PubSub + DNSCluster child specs inside `mode_children/0`'s `_web` branch. + +- [ ] **Step 2: Edit `base_children/0` and `mode_children/0`** + +In `lib/kith/application.ex`, find the current block: + +```elixir + defp base_children do + # Install fuse circuit breakers before starting supervised children + Kith.Geocoding.install_fuse() + Kith.Weather.install_fuse() + # Attach Sentry telemetry handler for Oban job failures + Kith.SentryEventHandler.attach() + # Capture crashes via Erlang logger handler (Sentry v10+, replaces PlugCapture) + :logger.add_handler(:sentry_handler, Sentry.LoggerHandler, %{}) + + [ + Kith.Vault, + Kith.Repo, + {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, + {Oban, Application.fetch_env!(:kith, Oban)}, + {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, + {Task.Supervisor, name: Kith.TaskSupervisor} + ] + end + + defp mode_children do + case System.get_env("KITH_MODE", "web") do + "worker" -> + [] + + _web -> + [ + Kith.PromEx, + KithWeb.Telemetry, + {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore}, + {Phoenix.PubSub, name: Kith.PubSub}, + KithWeb.Endpoint + ] + end + end +``` + +Replace with: + +```elixir + defp base_children do + # Install fuse circuit breakers before starting supervised children + Kith.Geocoding.install_fuse() + Kith.Weather.install_fuse() + # Attach Sentry telemetry handler for Oban job failures + Kith.SentryEventHandler.attach() + # Capture crashes via Erlang logger handler (Sentry v10+, replaces PlugCapture) + :logger.add_handler(:sentry_handler, Sentry.LoggerHandler, %{}) + + [ + Kith.Vault, + Kith.Repo, + {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, + {Oban, Application.fetch_env!(:kith, Oban)}, + {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, + {Task.Supervisor, name: Kith.TaskSupervisor}, + # PubSub + DNSCluster live here (not in mode_children) so worker mode + # also starts them. Required for cross-container progress broadcasts + # in the split-deployment topology (`docker-compose.prod.yml`). + {Phoenix.PubSub, name: Kith.PubSub}, + {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore} + ] + end + + defp mode_children do + case System.get_env("KITH_MODE", "web") do + "worker" -> + [] + + _web -> + [ + Kith.PromEx, + KithWeb.Telemetry, + KithWeb.Endpoint + ] + end + end +``` + +Notes: +- PubSub appears before `KithWeb.Endpoint` in startup order, because base_children precedes mode_children in `start/2`. `KithWeb.Endpoint` reads `pubsub_server: Kith.PubSub` from config — the registry is ready before it's needed. +- `DNSCluster` is harmless when its query is `:ignore` (the current default when no `DNS_CLUSTER_QUERY` env var is set). + +- [ ] **Step 3: Compile and run the suite** + +Run: `mix compile --warnings-as-errors && mix test` + +Expected: PASS. 1138 tests, 0 failures (current baseline). `Kith.PubSub` is now running in test env too, which is invisible to test code (no test subscribes/broadcasts; the existing ones use it transparently via LiveView mounts). + +- [ ] **Step 4: Manual smoke check — worker-mode startup** + +Run: `KITH_MODE=worker iex -S mix` + +Expected: app starts, no crash. Inside IEx, verify PubSub is running: + +```elixir +Process.whereis(Kith.PubSub) +# Expected: a PID, not nil +``` + +Exit IEx with `:q + Enter` (twice) or Ctrl-C twice. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection +git add lib/kith/application.ex +git commit -m "fix: start PubSub + DNSCluster in base_children for worker mode" +``` + +--- + +## Task 2: Gate Oban queues by KITH_MODE in :prod + +**Files:** +- Modify: `config/runtime.exs` + +- [ ] **Step 1: Find the rate-limiting block (anchor for the new block)** + +Run: `grep -n "Rate limiting" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/config/runtime.exs` + +Expected: a match around line 208 (`# Rate limiting — optional Redis backend`). + +- [ ] **Step 2: Add the Oban gating block** + +In `config/runtime.exs`, find the existing rate-limiting block ending around the existing `if System.get_env("RATE_LIMIT_BACKEND") == "redis" do ... end` block. Immediately AFTER that `end`, but still inside the outer `if config_env() == :prod do` block, add: + +```elixir + # Oban — only the worker container processes jobs in production. + # The web container can call `Oban.insert/1` to enqueue jobs, but + # runs no queues or plugins (no cron, no pruner) — so it never claims + # rows from `oban_jobs`. The worker container keeps the full config + # from `config.exs`. + # + # Dev (`config_env() == :dev`) is unaffected: this block only runs in + # `:prod`. Test env is pinned to `testing: :manual` in `config/test.exs`. + case System.get_env("KITH_MODE", "web") do + "worker" -> + :ok + + _web -> + config :kith, Oban, queues: false, plugins: false + end +``` + +Make sure indentation matches the surrounding `:prod` block (two spaces). + +- [ ] **Step 3: Verify placement is inside the `:prod` guard** + +Run: `grep -n "config_env\|config :kith, Oban" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/config/runtime.exs` + +Expected: the new `config :kith, Oban` line appears between the `if config_env() == :prod do` line and its closing `end`. The `case KITH_MODE` should NOT be at the top level of the file. + +- [ ] **Step 4: Run the test suite** + +Run: `mix test` + +Expected: PASS, 1138 tests, 0 failures. Test env is `:test` (not `:prod`), so the new block is unreached. + +- [ ] **Step 5: Smoke-check the prod compilation path** + +Run: `MIX_ENV=prod mix compile 2>&1 | tail -20` + +Expected: clean compile (the runtime.exs file is read but not evaluated at compile time, so any logical mistakes won't surface here — Step 6's IEx test is the real check). + +- [ ] **Step 6: Manual IEx check (simulate prod KITH_MODE=web)** + +Run: `MIX_ENV=prod KITH_MODE=web iex -S mix` + +Expected: app starts, then in IEx: + +```elixir +Application.get_env(:kith, Oban) |> Keyword.get(:queues) +# Expected: false +Application.get_env(:kith, Oban) |> Keyword.get(:plugins) +# Expected: false +``` + +Note: `MIX_ENV=prod iex -S mix` may fail if you don't have a prod DB / SECRET_KEY_BASE set. If it raises on startup before reaching IEx, switch to: + +```bash +MIX_ENV=prod KITH_MODE=web mix run -e 'IO.inspect(Application.fetch_env!(:kith, Oban))' +``` + +(also expected to raise on missing prod env vars, but the `config :kith, Oban, ...` mutation runs before that and you'll see `queues: false, plugins: false` in the inspected value if you can get it to surface. If you can't get prod env happily booting, skip this step and rely on Step 7's separate IEx-based KITH_MODE=worker check.) + +- [ ] **Step 7: Manual IEx check (simulate prod KITH_MODE=worker)** + +If prod-env IEx works: + +```bash +MIX_ENV=prod KITH_MODE=worker iex -S mix +``` + +Then: + +```elixir +Application.get_env(:kith, Oban) |> Keyword.get(:queues) +# Expected: a keyword list with default: 10, mailers: 10, ... (full config) +``` + +If prod IEx is not bootable in your local environment, accept that Step 4's test pass + the inline code review (Step 3) suffice; the real verification will happen in the docker-compose smoke test at Task 5. + +- [ ] **Step 8: Commit** + +```bash +git add config/runtime.exs +git commit -m "fix: gate Oban queues by KITH_MODE in :prod (web=insert-only)" +``` + +--- + +## Task 3: Add clustering env to `docker-compose.prod.yml` + +**Files:** +- Modify: `docker-compose.prod.yml` + +- [ ] **Step 1: Locate the `app` service definition** + +Run: `grep -n "^ app:\|^ worker:" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/docker-compose.prod.yml` + +Expected: `app:` around line 61, `worker:` around line 135. + +- [ ] **Step 2: Add hostname + env vars + network alias to the `app` service** + +In `docker-compose.prod.yml`, find the `app:` block. Insert a `hostname:` field right after `command:` (or another visible top-level field) and add the three new env vars in its `environment:` block. Then add a `networks:` block at the same level as `environment:`. + +The `app` service block should look like: + +```yaml + app: + image: kith:latest + command: ["start"] + hostname: kith-app + depends_on: + migrate: + condition: service_completed_successfully + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + read_only: true + tmpfs: + - /tmp:size=64M + environment: + # ── BEAM distribution / clustering ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster + # ── existing env vars unchanged ── + DATABASE_URL: ${DATABASE_URL} + SECRET_KEY_BASE: ${SECRET_KEY_BASE} + # ... (leave the rest of the env block unchanged) + networks: + default: + aliases: + - kith-cluster + volumes: + - uploads:/app/uploads + # ... (rest unchanged) +``` + +Important: the existing block does not declare a `networks:` section because Compose creates a default network automatically. The new `networks:` section attaches this service to that same default network, with the `kith-cluster` alias added. Compose accepts this without explicit network definition; if Compose complains about missing top-level `networks:` declaration, add this block at the bottom of the file (outside any service): + +```yaml +networks: + default: + name: kith_default +``` + +(only add the top-level block if Compose errors without it — start with just the per-service alias block and only add the top-level if needed.) + +- [ ] **Step 3: Add the same three env vars + alias + hostname to the `worker` service** + +In the `worker:` block, mirror the changes from Step 2 but use `kith-worker` as the hostname: + +```yaml + worker: + image: kith:latest + command: ["start"] + hostname: kith-worker + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + read_only: true + tmpfs: + - /tmp:size=64M + depends_on: + postgres: + condition: service_healthy + migrate: + condition: service_completed_successfully + environment: + # ── BEAM distribution / clustering ── + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster + # ── existing env vars unchanged ── + DATABASE_URL: ${DATABASE_URL} + SECRET_KEY_BASE: ${SECRET_KEY_BASE} + # ... (leave the rest unchanged) + networks: + default: + aliases: + - kith-cluster + # ... (rest unchanged) +``` + +- [ ] **Step 4: Validate the compose file** + +Run: + +```bash +docker compose -f /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/docker-compose.prod.yml config 2>&1 | head -40 +``` + +Expected: a parsed render of the compose file with no error. The output should include: +- `hostname: kith-app` and `hostname: kith-worker` lines +- `RELEASE_COOKIE`, `RELEASE_DISTRIBUTION: name`, `DNS_CLUSTER_QUERY: kith-cluster` env keys on both services +- `aliases: [kith-cluster]` under both `app.networks.default` and `worker.networks.default` + +If `config` errors out about an undefined `RELEASE_COOKIE` env var, that's expected unless you've already added it to `.env`. Re-run with `RELEASE_COOKIE=$(openssl rand -base64 32) docker compose ... config`. The validation is about structure, not values. + +- [ ] **Step 5: Commit** + +```bash +git add docker-compose.prod.yml +git commit -m "infra: cluster app + worker containers via shared cookie + DNS alias" +``` + +--- + +## Task 4: Document `RELEASE_COOKIE` in `.env.example` + +**Files:** +- Modify: `.env.example` + +- [ ] **Step 1: Find the section anchor** + +Run: `grep -n "SECRET_KEY_BASE" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/.env.example` + +Expected: a line introducing the SECRET_KEY_BASE entry. Use this as the anchor. + +- [ ] **Step 2: Add the `RELEASE_COOKIE` entry** + +In `.env.example`, find the `SECRET_KEY_BASE` block and immediately AFTER it (after any comments and the SECRET_KEY_BASE= line itself), add: + +```bash +# Erlang BEAM distribution cookie. Shared between the app and worker +# containers so they can cluster for cross-container PubSub broadcasts +# (LiveView import progress). Generate with one of: +# mix phx.gen.secret 32 +# openssl rand -base64 32 +RELEASE_COOKIE= +``` + +The trailing empty value is intentional — `.env.example` uses empty placeholders elsewhere as a "fill this in" signal. Match the file's style; if other secrets use a placeholder like ``, mirror that. + +- [ ] **Step 3: Verify the example file** + +Run: `grep -A 5 "RELEASE_COOKIE" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/.env.example` + +Expected: see the comment + the empty assignment. + +- [ ] **Step 4: Commit** + +```bash +git add .env.example +git commit -m "docs: document RELEASE_COOKIE in .env.example" +``` + +--- + +## Task 5: Manual smoke verification (docker-compose.prod) + +**Files:** *(no code changes — verification only)* + +- [ ] **Step 1: Generate a cookie and put it in `.env`** + +```bash +cd /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection +echo "RELEASE_COOKIE=$(openssl rand -base64 32)" >> .env +chmod 600 .env +``` + +(skip if your `.env` already has `RELEASE_COOKIE` set.) + +- [ ] **Step 2: Build the prod image with the new code** + +```bash +docker build -t kith:latest . +``` + +Expected: a successful build. + +- [ ] **Step 3: Bring up the prod stack** + +```bash +docker compose -f docker-compose.prod.yml up -d +``` + +Wait ~30 seconds for migrate + app + worker to come up. Check status: + +```bash +docker compose -f docker-compose.prod.yml ps +``` + +Expected: `migrate` exited 0, `postgres` running healthy, `app` and `worker` both running. + +- [ ] **Step 4: Verify clustering** + +```bash +docker compose -f docker-compose.prod.yml exec app /app/bin/kith eval 'IO.inspect(Node.list())' +``` + +Expected: `[:"kith@kith-worker"]` + +```bash +docker compose -f docker-compose.prod.yml exec worker /app/bin/kith eval 'IO.inspect(Node.list())' +``` + +Expected: `[:"kith@kith-app"]` + +If either returns `[]`, wait 10 more seconds (DNSCluster polls periodically) and retry. If still empty, check the symptom matrix in the spec's "Failure modes to watch for" section. + +- [ ] **Step 5: Verify Oban gating** + +```bash +docker compose -f docker-compose.prod.yml exec app /app/bin/kith eval \ + 'IO.inspect(Application.fetch_env!(:kith, Oban) |> Keyword.get(:queues))' +``` + +Expected: `false` + +```bash +docker compose -f docker-compose.prod.yml exec worker /app/bin/kith eval \ + 'IO.inspect(Application.fetch_env!(:kith, Oban) |> Keyword.get(:queues))' +``` + +Expected: `[default: 10, mailers: 10, reminders: 5, exports: 2, imports: 2, immich: 3, purge: 1]` + +- [ ] **Step 6: Trigger an import via the wizard** + +In a browser, open the app (URL per your local Caddy config, usually `http://localhost`), log in, go to **Settings → Import**, choose **Monica CRM (API)**, enter test credentials for your Monica instance, start the import. + +Observe (using `docker compose -f docker-compose.prod.yml logs -f`): + +- Worker container log shows `MonicaApiCrawlWorker` starting +- App container log does NOT show `Oban` executor logs +- Browser shows a progress bar updating in real time (PubSub crossed containers) +- On completion, the wizard shows the "import complete" UI + +- [ ] **Step 7: Verify the misc worker also runs on worker** + +While the import is running (or shortly after main crawl completes), check Oban's job table: + +```bash +docker compose -f docker-compose.prod.yml exec postgres \ + psql -U kith -d kith_prod -c \ + "SELECT id, worker, queue, state FROM oban_jobs ORDER BY id DESC LIMIT 10;" +``` + +Expected: see rows for `Kith.Workers.MonicaApiCrawlWorker` and (after main crawl completes) `Kith.Workers.MonicaMiscDataWorker`, all with `state = 'completed'` (or `executing` while in flight). + +- [ ] **Step 8: Verify no PubSub crash on worker** + +```bash +docker compose -f docker-compose.prod.yml logs worker | grep -i 'unknown registry\|Kith.PubSub' +``` + +Expected: empty output (no crashes referencing Kith.PubSub). + +- [ ] **Step 9: Tear down** + +```bash +docker compose -f docker-compose.prod.yml down +``` + +(or leave running if you want to keep iterating.) + +- [ ] **Step 10: No commit for this task** (verification-only). + +--- + +## Self-review checklist + +Before handing off: + +1. **Spec coverage:** + - Bug A (PubSub crash in worker mode) → Task 1 ✓ + - Bug B (Oban race) → Task 2 ✓ + - Bug C (cross-container PubSub) → Task 1 + Task 3 ✓ + - `.env.example` documentation → Task 4 ✓ + - Verification → Task 5 ✓ + +2. **Placeholders:** Every step has concrete code/commands. No "TBD", "implement later", "add error handling". + +3. **Type consistency:** + - `KITH_MODE` env var spelled consistently (matches application.ex case statement) + - `RELEASE_COOKIE`, `RELEASE_DISTRIBUTION`, `DNS_CLUSTER_QUERY` consistent across compose + spec + - Network alias `kith-cluster` consistent on both services + `DNS_CLUSTER_QUERY` value + - Hostnames `kith-app` / `kith-worker` consistent with Node.list() expectations in Task 5 + +4. **Order safety:** + - Task 1 is safe in isolation (PubSub starts in worker mode, no behavior change in web mode) + - Task 2 builds on Task 1 (without Task 1, gating queues to worker means jobs run there and crash on PubSub broadcast) + - Task 3 builds on Task 2 (without clustering, gating means no LiveView progress) + - Task 4 is metadata-only + - Task 5 verifies the cumulative effect + + If anything stops working mid-implementation, intermediate state after Task 1 alone is strictly better than current state (crash is fixed; race remains). + +5. **Backout:** Each task is a single commit. `git revert ` cleanly undoes any one task without affecting the others (Task 2 depends on Task 1 for correctness but not for compile; the inverse holds for Task 3 + Task 2). diff --git a/docs/superpowers/plans/2026-05-16-monica-import-perf-fix.md b/docs/superpowers/plans/2026-05-16-monica-import-perf-fix.md new file mode 100644 index 0000000..ed3c8fa --- /dev/null +++ b/docs/superpowers/plans/2026-05-16-monica-import-perf-fix.md @@ -0,0 +1,1997 @@ +# Monica Import Performance Fix Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Restore the Monica importer to a reasonable runtime by extracting Phase 4 (per-contact extra data) into a dedicated throttled background worker, collapsing the double retry layer, and paying back the perf debt introduced in commit `6af91bf`. + +**Architecture:** Phase 4 moves out of `MonicaApi.crawl/5` into a new `MonicaMiscDataWorker` Oban job enqueued by the existing `MonicaApiCrawlWorker` on success. The new worker consumes a plan built during main crawl that pre-filters contacts by Monica's `statistics.number_of_*` fields. A single `Hammer`-backed `RateLimiter` paces every outbound Monica call (~55 req/min) so 429s become rare; the hand-rolled retry wrapper is deleted and `Req`'s built-in `:safe_transient` retry is the sole retry source. Two cleanups: phone-cft lookup moves from a `:persistent_term`-cached boolean into a `MapSet` on `ref_data`, and `Contacts.create_contact_field` accepts an explicit `normalize: false` option so the Monica path skips the redundant second normalization. + +**Tech Stack:** Elixir 1.18, Phoenix LiveView, Oban 2.18 (queue `:imports`), Req 0.5, Hammer 6.2, ex_phone_number 0.4. + +**Reference spec:** `docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md` + +--- + +## Task 1: Add the RateLimiter module + unit tests + +**Files:** +- Create: `lib/kith/imports/sources/monica_api/rate_limiter.ex` +- Create: `test/kith/imports/sources/monica_api/rate_limiter_test.exs` + +- [ ] **Step 1: Inspect the Hammer setup in the project so the new module uses the same backend** + +Run: `grep -rn "Hammer\|hammer:" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/config /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib --include='*.exs' --include='*.ex' | head -20` + +Expected: see `:hammer` config and an existing usage (e.g. `KithWeb.Plugs.RateLimiter`) calling `Hammer.check_rate/3`. Note the backend module (likely `Hammer.Backend.ETS`) so test setup mirrors it. + +- [ ] **Step 2: Write the failing tests** + +Create `test/kith/imports/sources/monica_api/rate_limiter_test.exs`: + +```elixir +defmodule Kith.Imports.Sources.MonicaApi.RateLimiterTest do + use ExUnit.Case, async: false + + alias Kith.Imports.Sources.MonicaApi.RateLimiter + + # Tests run with the real Hammer backend; we use a unique host per test + # so buckets do not collide between tests. + + setup do + # Force a low limit for predictable timing. + prev = Application.get_env(:kith, :monica_rate_limit) + Application.put_env(:kith, :monica_rate_limit, 3) + on_exit(fn -> Application.put_env(:kith, :monica_rate_limit, prev) end) + :ok + end + + defp unique_host, do: "test-#{System.unique_integer([:positive])}.example" + + describe "wait!/1" do + test "returns :ok immediately while under the per-minute budget" do + host = unique_host() + + {us, _} = + :timer.tc(fn -> + for _ <- 1..3, do: assert :ok = RateLimiter.wait!("https://#{host}") + end) + + assert us < 50_000, "expected sub-50ms for 3 calls under the budget, got #{us}us" + end + + test "sleeps once the budget is exhausted" do + host = unique_host() + for _ <- 1..3, do: RateLimiter.wait!("https://#{host}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host}") end) + + # One inter-call sleep (≈1100ms) is enough to clear back into the window + # for the test's tiny limit. Allow generous slack. + assert us >= 1_000_000, "expected ≥1s wait when over budget, got #{us}us" + end + + test "per-host buckets do not share quota" do + host_a = unique_host() + host_b = unique_host() + + for _ <- 1..3, do: RateLimiter.wait!("https://#{host_a}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host_b}") end) + assert us < 50_000, "host_b should be in its own bucket" + end + + test "extracts the host portion of a URL for the bucket key" do + url1 = "https://example.test/api/contacts" + url2 = "https://example.test/api/me" + + # Same host → same bucket → exhausting via url1 should impact url2. + for _ <- 1..3, do: RateLimiter.wait!(url1) + + {us, _} = :timer.tc(fn -> RateLimiter.wait!(url2) end) + assert us >= 1_000_000 + end + end +end +``` + +- [ ] **Step 3: Run the tests to verify they fail** + +Run: `mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs` + +Expected: FAIL with `(UndefinedFunctionError) function Kith.Imports.Sources.MonicaApi.RateLimiter.wait!/1 is undefined`. + +- [ ] **Step 4: Write the module** + +Create `lib/kith/imports/sources/monica_api/rate_limiter.ex`: + +```elixir +defmodule Kith.Imports.Sources.MonicaApi.RateLimiter do + @moduledoc """ + Per-host token bucket for outbound Monica API calls. + + Configured at one token below Monica's documented default of 60 requests + per minute, leaving a one-call safety margin so a small clock-skew or + burst on Monica's side does not push us into the 429 window. + + Configurable via: + + config :kith, :monica_rate_limit, + + per-test overrides via `Application.put_env/3`. + + Hammer (already a dep) supplies the underlying token bucket; we use a + bucket key per Monica host so independent Monica instances do not share + a quota. Calls block the caller process via `Process.sleep/1` until a + token is available, then return `:ok`. + """ + + @scale_ms 60_000 + @default_limit 55 + @retry_sleep_ms 1_100 + + @doc """ + Block until a request token is available for the given Monica host. + + `url_or_host` may be a full URL (the host is extracted) or a bare host + string. Returns `:ok` once a token has been claimed. + """ + @spec wait!(String.t()) :: :ok + def wait!(url_or_host) when is_binary(url_or_host) do + bucket = bucket_key(url_or_host) + limit = Application.get_env(:kith, :monica_rate_limit, @default_limit) + + case Hammer.check_rate(bucket, @scale_ms, limit) do + {:allow, _count} -> + :ok + + {:deny, _retry_after_ms} -> + Process.sleep(@retry_sleep_ms) + wait!(url_or_host) + end + end + + defp bucket_key(url_or_host) do + host = URI.parse(url_or_host).host || url_or_host + "monica_api:#{host}" + end +end +``` + +- [ ] **Step 5: Run the tests to verify they pass** + +Run: `mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs` + +Expected: PASS, 4 tests. + +- [ ] **Step 6: Verify the rest of the suite still passes** + +Run: `mix test` + +Expected: PASS, no new failures. (If `mix test` triggers Hammer initialization that wasn't set up, surface it now rather than later.) + +- [ ] **Step 7: Commit** + +```bash +cd /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection +git add lib/kith/imports/sources/monica_api/rate_limiter.ex test/kith/imports/sources/monica_api/rate_limiter_test.exs +git commit -m "feat: add Monica API per-host rate limiter (55/min)" +``` + +--- + +## Task 2: Config knobs for the rate limit + +**Files:** +- Modify: `config/config.exs` +- Modify: `config/test.exs` + +- [ ] **Step 1: Add the production default** + +Open `config/config.exs`. After the existing `config :ex_cldr, default_backend: Kith.Cldr` line (added in commit `6af91bf`), add: + +```elixir +# Outbound rate limit for Monica API calls. One below the documented +# default of 60 req/min leaves a one-call safety margin. +config :kith, :monica_rate_limit, 55 +``` + +- [ ] **Step 2: Add a high-ceiling override for tests** + +Open `config/test.exs`. After the existing `config :ex_phone_number, metadata_file: ...` line (added in commit `6af91bf`), add: + +```elixir +# Effectively unthrottled in tests — throttle logic is exercised in +# isolation in rate_limiter_test.exs, not via the full crawl integration. +config :kith, :monica_rate_limit, 1_000_000 +``` + +- [ ] **Step 3: Verify both configs compile and tests still pass** + +Run: `mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs && mix test` + +Expected: PASS. The rate_limiter test brackets its own override, so the high test default doesn't break it. The rest of the suite shouldn't notice. + +- [ ] **Step 4: Commit** + +```bash +git add config/config.exs config/test.exs +git commit -m "chore: configure Monica API rate limit (55/min prod, unlimited test)" +``` + +--- + +## Task 3: Wire RateLimiter into `api_get` and collapse the double retry + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`@max_rate_limit_retries`, `@rate_limit_sleep_ms`, `api_get`, `api_get_json`, `api_get_json_with_retry`) + +- [ ] **Step 1: Locate the existing functions** + +Run: `grep -n "@max_rate_limit_retries\|@rate_limit_sleep_ms\|defp api_get\|defp api_get_json" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: matches around lines 37-38 (module attrs), 1101 (`api_get`), 1109 (`api_get_json`), 1113-1118 (`api_get_json_with_retry`). + +- [ ] **Step 2: Add the alias** + +In `lib/kith/imports/sources/monica_api.ex`, find the `alias` block near the top (currently includes `Kith.Contacts.PhoneFormatter`). Add right after the existing aliases: + +```elixir + alias Kith.Imports.Sources.MonicaApi.RateLimiter +``` + +- [ ] **Step 3: Delete the two module attributes** + +In the same file, find and delete the lines: + +```elixir + @max_rate_limit_retries 3 + @rate_limit_sleep_ms :timer.seconds(65) +``` + +- [ ] **Step 4: Replace `api_get/3` with the throttled version** + +Find the existing `api_get/3`: + +```elixir + defp api_get(credential, url, params \\ []) do + headers = [{"Authorization", "Bearer #{credential.api_key}"}, {"Accept", "application/json"}] + req_options = Map.get(credential, :req_options, []) + options = [headers: headers, params: params] ++ req_options + + Req.get(url, options) + end +``` + +Replace with: + +```elixir + defp api_get(credential, url, params \\ []) do + RateLimiter.wait!(credential.url) + + headers = [{"Authorization", "Bearer #{credential.api_key}"}, {"Accept", "application/json"}] + req_options = Map.get(credential, :req_options, []) + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ req_options + + Req.get(url, options) + end +``` + +`max_retries: 5` overrides Req's default of 3 so a sustained 429 window doesn't terminate the call. `retry_log_level: :warn` keeps the existing log visibility. + +- [ ] **Step 5: Replace `api_get_json/3` and delete `api_get_json_with_retry/4`** + +Find: + +```elixir + defp api_get_json(credential, url, params) do + api_get_json_with_retry(credential, url, params, 0) + end + + defp api_get_json_with_retry(_credential, _url, _params, retries) + when retries >= @max_rate_limit_retries do + {:error, :rate_limited} + end + + defp api_get_json_with_retry(credential, url, params, retries) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> + {:ok, body} + + {:ok, %{status: 429}} -> + Logger.info( + "[MonicaApi] Rate limited, sleeping #{@rate_limit_sleep_ms}ms (retry #{retries + 1})" + ) + + Process.sleep(@rate_limit_sleep_ms) + api_get_json_with_retry(credential, url, params, retries + 1) + + {:ok, %{status: status}} -> + {:error, "Unexpected status: #{status}"} + + {:error, reason} -> + {:error, reason} + end + end +``` + +Replace the entire block with: + +```elixir + defp api_get_json(credential, url, params) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} + end + end +``` + +The `{:error, :rate_limited}` shape is preserved — it's matched by callers (e.g. line 183 of `crawl_contacts_loop`, line 949 of `fetch_extra_notes_for_contact`). After 5 internal Req retries we surface rate-limited rather than silently looping. + +- [ ] **Step 6: Run the existing Monica tests to verify behavior is preserved** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs test/kith/workers/monica_api_crawl_worker_test.exs` + +Expected: PASS. The contract callers depend on (`{:ok, body}` / `{:error, :rate_limited}` / `{:error, other}`) is unchanged. + +- [ ] **Step 7: Spot-check no dangling references to deleted attrs** + +Run: `grep -n "max_rate_limit_retries\|rate_limit_sleep_ms\|api_get_json_with_retry" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: no matches. If anything remains, delete it. + +- [ ] **Step 8: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex +git commit -m "refactor: collapse Monica double-retry to Req's built-in + RateLimiter" +``` + +--- + +## Task 4: Add `normalize: false` opt to `Contacts.create_contact_field` + +**Files:** +- Modify: `lib/kith/contacts.ex` (line 390) +- Modify: `test/kith/contacts_sub_entities_test.exs` + +- [ ] **Step 1: Inspect the existing test file to follow its setup pattern** + +Run: `grep -n "describe\|create_contact_field" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/test/kith/contacts_sub_entities_test.exs | head -20` + +Expected: see the existing describe blocks for `create_contact_field/2`. Note the setup helpers used (likely `setup_account()`, `seed_reference_data!()`). + +- [ ] **Step 2: Add a failing test** + +In `test/kith/contacts_sub_entities_test.exs`, find the `describe "create_contact_field/2"` block (or the location of existing contact_field tests) and add inside it: + +```elixir + test "create_contact_field/3 with normalize: false skips phone normalization", + %{account: account, phone_field_type: phone_type} do + contact = insert(:contact, account: account) + + # Value that PhoneFormatter.normalize/1 would change (no region, but + # +-prefixed numbers get parsed and re-emitted as canonical E.164). + # We assert the value is stored unchanged when normalization is skipped. + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = + Kith.Contacts.create_contact_field(contact, attrs, normalize: false) + + assert field.value == "+1 (202) 555-0100" + end + + test "create_contact_field/3 with normalize: true (default) normalizes phone", + %{account: account, phone_field_type: phone_type} do + contact = insert(:contact, account: account) + + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = Kith.Contacts.create_contact_field(contact, attrs) + assert field.value == "+12025550100" + end +``` + +If the existing tests don't already provide `phone_field_type` in the setup context, add a setup helper at the top of the describe block: + +```elixir + setup %{account: account} do + phone_type = + Kith.Repo.one!( + from t in "contact_field_types", + where: t.protocol == "tel:", + select: %{id: t.id}, + limit: 1 + ) + + {:ok, phone_field_type: phone_type} + end +``` + +Adapt this to whatever shape the file already uses — if the file's setup already returns the account, ensure the new helper merges with it rather than replacing it. + +- [ ] **Step 3: Run the new tests, expect the first to fail** + +Run: `mix test test/kith/contacts_sub_entities_test.exs -k "normalize"` + +Expected: One test fails (the 3-arity call) with `(UndefinedFunctionError) function Kith.Contacts.create_contact_field/3 is undefined`. The 2-arity test should already pass. + +- [ ] **Step 4: Implement the 3-arity version** + +Open `lib/kith/contacts.ex` and find `create_contact_field/2` (around line 390): + +```elixir + def create_contact_field(%Contact{} = contact, attrs) do + attrs = maybe_normalize_phone(attrs) + + %ContactField{contact_id: contact.id, account_id: contact.account_id} + |> ContactField.changeset(attrs) + |> Repo.insert() + end +``` + +Replace with: + +```elixir + def create_contact_field(%Contact{} = contact, attrs, opts \\ []) do + attrs = + if Keyword.get(opts, :normalize, true) do + maybe_normalize_phone(attrs) + else + attrs + end + + %ContactField{contact_id: contact.id, account_id: contact.account_id} + |> ContactField.changeset(attrs) + |> Repo.insert() + end +``` + +The default-arg `opts \\ []` keeps every existing 2-arity caller working without changes. Only callers that explicitly want to bypass normalization need to pass `normalize: false`. + +- [ ] **Step 5: Run the tests to verify both pass** + +Run: `mix test test/kith/contacts_sub_entities_test.exs -k "normalize"` + +Expected: PASS, both tests. + +- [ ] **Step 6: Run the full Contacts test files to verify no regressions** + +Run: `mix test test/kith/contacts_sub_entities_test.exs test/kith/contacts/contact_test.exs` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add lib/kith/contacts.ex test/kith/contacts_sub_entities_test.exs +git commit -m "feat: Contacts.create_contact_field/3 supports normalize: false opt" +``` + +--- + +## Task 5: Monica importer passes `normalize: false` to `create_contact_field` + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`create_contact_field/5` helper at ~line 452) +- Modify: `test/kith/imports/sources/monica_api_test.exs` + +- [ ] **Step 1: Locate the inner helper** + +Run: `grep -n "defp create_contact_field" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: one match near line 452. + +- [ ] **Step 2: Update the inner helper** + +Find: + +```elixir + defp create_contact_field(contact, field, cft_id, value, import_job) do + attrs = %{"value" => value, "contact_field_type_id" => cft_id} + + case Contacts.create_contact_field(contact, attrs) do +``` + +Replace the `Contacts.create_contact_field(contact, attrs)` call with: + +```elixir + case Contacts.create_contact_field(contact, attrs, normalize: false) do +``` + +The Monica path already normalizes phone values upfront in `normalize_field_value/3` using the user-chosen region. The downstream `Contacts.maybe_normalize_phone/1` would re-parse the same E.164 value and do a redundant `Repo.get(ContactFieldType, ...)` per write. Skipping it saves ~2000 libphonenumber parses and ~5000 DB round trips per 1000-contact import. + +- [ ] **Step 3: Write a test asserting Monica import doesn't double-normalize** + +Tricky to assert directly without instrumenting. Instead, add a behavioral test in `test/kith/imports/sources/monica_api_test.exs` that imports a phone field and verifies the stored value matches what `PhoneFormatter.normalize/2` would produce (i.e. the import path's own normalization is the single source of truth): + +In `test/kith/imports/sources/monica_api_test.exs`, find the existing test `"normalizes phone fields to E.164 when phone_default_region is set"` (added in commit `6af91bf`). Right after it, add: + +```elixir + test "phone normalization happens exactly once during import", + %{user: user, account_id: account_id} do + # Regression: Contacts.create_contact_field used to re-run + # maybe_normalize_phone on the already-E.164 value, costing one extra + # libphonenumber parse and one extra Repo.get per phone field. The + # behavioral assertion here is "value stored matches MonicaApi's own + # normalization output exactly, with no later mutation." + contacts = [ + contact_json( + id: 99, + first_name: "OnceOnly", + contact_fields: [ + contact_field_json(content: "(202) 555-0100", type_name: "Phone") + ] + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, _} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "phone_default_region" => "US" + }) + + rec = Imports.find_import_record(account_id, "monica_api", "contact", "99") + + values = + Repo.all(from cf in Contacts.ContactField, where: cf.contact_id == ^rec.local_entity_id) + |> Enum.map(& &1.value) + + assert "+12025550100" in values + end +``` + +This test passes both before and after Task 5; its purpose is to lock in the behavior so a future regression that re-introduces double-normalization (e.g. accidentally calling `normalize/1` with `nil` region on an already-canonical value) doesn't change the stored value. + +- [ ] **Step 4: Run the test and existing Monica tests** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS, all tests. + +- [ ] **Step 5: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex test/kith/imports/sources/monica_api_test.exs +git commit -m "perf: skip redundant normalization in Monica contact_field writes" +``` + +--- + +## Task 6: Replace `:persistent_term` phone-cft cache with `ref_data` MapSet + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`crawl/5`, `build_or_update_ref_data/3`, `normalize_field_value/3`, delete `phone_field_type?/1`, delete `phone_field_type?(nil)`) + +- [ ] **Step 1: Locate the cache and the ref_data builders** + +Run: + +```bash +grep -n "phone_field_type?\|build_or_update_ref_data\|defp build_ref_data\|ref_data: ref_data\|ref_data ->" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex +``` + +Expected: matches at the cache (~line 432-450), `build_or_update_ref_data` (~line 864-870), `find_or_create_contact_field_types` (~line 956-966), and various ref_data references in the contact loop. + +- [ ] **Step 2: Read the existing `build_or_update_ref_data` and `find_or_create_contact_field_types`** + +Open `lib/kith/imports/sources/monica_api.ex`. Read both functions (~lines 860-970). Confirm the shape: `ref_data` is a map with keys including `contact_field_types: %{name => id}`. `build_or_update_ref_data` is called per page to merge in newly-discovered cft types. + +- [ ] **Step 3: Add a helper to compute phone-cft IDs from a set of cft IDs** + +In `lib/kith/imports/sources/monica_api.ex`, add a new private helper near the other ref_data helpers (place it just before `find_or_create_contact_field_types/2` so related code clusters together): + +```elixir + # Returns the subset of `cft_ids` whose protocol begins with "tel" (phone). + # Called when ref_data is built or refreshed; the resulting MapSet replaces + # the per-cft `:persistent_term` cache that triggered global GCs on cold + # imports. + defp phone_cft_ids(account_id, cft_ids) when is_list(cft_ids) do + Repo.all( + from t in Contacts.ContactFieldType, + where: t.id in ^cft_ids, + where: is_nil(t.account_id) or t.account_id == ^account_id, + where: fragment("? LIKE 'tel%'", t.protocol), + select: t.id + ) + |> MapSet.new() + end +``` + +Account scope mirrors the existing pattern in `find_or_create_contact_field_types/2`. The `is_nil(t.account_id)` clause handles system-wide cft types seeded in test/dev. + +- [ ] **Step 4: Extend `ref_data` to carry `phone_cft_ids`** + +Find `build_or_update_ref_data/3` (the initial build path, ~line 864): + +```elixir + defp build_or_update_ref_data(account_id, contacts, nil) do + cfts = collect_api_contact_field_types(contacts) + + %{ + contact_field_types: find_or_create_contact_field_types(account_id, cfts) + } + end +``` + +Replace with: + +```elixir + defp build_or_update_ref_data(account_id, contacts, nil) do + cfts = collect_api_contact_field_types(contacts) + cft_map = find_or_create_contact_field_types(account_id, cfts) + + %{ + contact_field_types: cft_map, + phone_cft_ids: phone_cft_ids(account_id, Map.values(cft_map)) + } + end +``` + +Find the update path (the function head matching when `ref_data` is non-nil, ~line 886): + +```elixir + defp build_or_update_ref_data(account_id, contacts, ref_data) do + new_cfts = + contacts + |> collect_api_contact_field_types() + |> Enum.reject(&Map.has_key?(ref_data.contact_field_types, &1)) + + %{ + ref_data | + contact_field_types: + Map.merge( + ref_data.contact_field_types, + find_or_create_contact_field_types(account_id, new_cfts) + ) + } + end +``` + +Replace with: + +```elixir + defp build_or_update_ref_data(account_id, contacts, ref_data) do + new_cfts = + contacts + |> collect_api_contact_field_types() + |> Enum.reject(&Map.has_key?(ref_data.contact_field_types, &1)) + + if new_cfts == [] do + ref_data + else + added = find_or_create_contact_field_types(account_id, new_cfts) + merged_types = Map.merge(ref_data.contact_field_types, added) + + %{ + ref_data + | contact_field_types: merged_types, + phone_cft_ids: + MapSet.union( + ref_data.phone_cft_ids, + phone_cft_ids(account_id, Map.values(added)) + ) + } + end + end +``` + +The short-circuit when `new_cfts == []` avoids running the phone-cft query on every page when no new cft types appear (the common case). + +- [ ] **Step 5: Update `normalize_field_value/3` to take `ctx`** + +Find `normalize_field_value` (~line 419): + +```elixir + defp normalize_field_value(nil, _cft_id, _opts), do: nil + + defp normalize_field_value(value, cft_id, opts) when is_binary(value) do + if phone_field_type?(cft_id) do + region = opts["phone_default_region"] + region = if region in [nil, ""], do: nil, else: region + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value + end + end +``` + +Replace with: + +```elixir + defp normalize_field_value(nil, _cft_id, _ctx), do: nil + + defp normalize_field_value(value, cft_id, ctx) when is_binary(value) do + if MapSet.member?(ctx.ref_data.phone_cft_ids, cft_id) do + region = parse_phone_region(ctx.opts["phone_default_region"]) + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value + end + end + + defp parse_phone_region(region) when region in [nil, ""], do: nil + defp parse_phone_region(region) when is_binary(region), do: region +``` + +- [ ] **Step 6: Update the call site in `import_single_contact_field/4`** + +Find (~line 406): + +```elixir + defp import_single_contact_field(contact, field, ref_data, ctx) do + cft_name = get_in(field, ["contact_field_type", "name"]) + cft_id = if cft_name, do: Map.get(ref_data.contact_field_types, cft_name) + raw_value = field["content"] + value = normalize_field_value(raw_value, cft_id, ctx.opts) +``` + +Change the last line to pass `ctx`: + +```elixir + value = normalize_field_value(raw_value, cft_id, ctx) +``` + +- [ ] **Step 7: Delete `phone_field_type?/1`** + +Delete both clauses (~lines 432-450): + +```elixir + defp phone_field_type?(nil), do: false + + defp phone_field_type?(cft_id) do + case :persistent_term.get({__MODULE__, :phone_cft, cft_id}, :miss) do + :miss -> + result = + Repo.exists?( + from(t in Contacts.ContactFieldType, + where: t.id == ^cft_id and fragment("? LIKE 'tel%'", t.protocol) + ) + ) + + :persistent_term.put({__MODULE__, :phone_cft, cft_id}, result) + result + + result -> + result + end + end +``` + +- [ ] **Step 8: Run the full Monica test suite** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs test/kith/workers/monica_api_crawl_worker_test.exs` + +Expected: PASS. The behavior is unchanged externally — phones still normalize correctly when a region is supplied — only the internal mechanism shifts from `:persistent_term`+lazy-DB-query to `MapSet`-on-`ref_data`. + +- [ ] **Step 9: Verify no `:persistent_term` reads remain in the file** + +Run: `grep -n ":persistent_term" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: no matches. + +- [ ] **Step 10: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex +git commit -m "perf: replace :persistent_term phone-cft cache with ref_data MapSet" +``` + +--- + +## Task 7: Add `collect_misc_data/5` and extend the deferred state + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (`crawl_all_contacts/1` initial state, contact loop wiring, new `@misc_endpoints` attribute, `collect_misc_data/5`) +- Modify: `test/kith/imports/sources/monica_api_test.exs` + +- [ ] **Step 1: Find the deferred state initialization** + +Run: `grep -n "deferred:\|extra_notes: \[\]\|first_met_through: \[\]" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex | head -10` + +Expected: a match in `crawl_all_contacts/1` (~line 156-163) where `deferred` is initialized as `%{first_met_through: [], relationships: [], extra_notes: []}`. + +- [ ] **Step 2: Add `misc_data: []` to the deferred initial state** + +Open `lib/kith/imports/sources/monica_api.ex`. Find the initialization (~line 156): + +```elixir + defp crawl_all_contacts(ctx) do + initial_state = %{ + page: 1, + total: nil, + acc: %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, + deferred: %{first_met_through: [], relationships: [], extra_notes: []}, + ref_data: nil, + global_idx: 0 + } + + crawl_contacts_loop(ctx, initial_state) + end +``` + +Change to: + +```elixir + defp crawl_all_contacts(ctx) do + initial_state = %{ + page: 1, + total: nil, + acc: %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, + deferred: %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }, + ref_data: nil, + global_idx: 0 + } + + crawl_contacts_loop(ctx, initial_state) + end +``` + +- [ ] **Step 3: Add the `@misc_endpoints` module attribute and helper** + +Find the location just below the existing `defp collect_extra_notes` (~line 583-599). After it, add: + +```elixir + @misc_endpoints [ + {:calls, "number_of_calls"}, + {:activities, "number_of_activities"}, + {:gifts, "number_of_gifts"}, + {:debts, "number_of_debts"}, + {:tasks, "number_of_tasks"}, + {:reminders, "number_of_reminders"}, + {:conversations, "number_of_conversations"} + ] + + # Build a plan entry for a contact's per-contact extra-data endpoints. + # An endpoint is included only if (a) the wizard opt for that data type is + # not explicitly false AND (b) Monica's `statistics.number_of_X` reports + # > 0 (or the stat field is missing — safer to fetch than to silently + # skip when Monica's payload shape is unfamiliar). + # + # `:pets` has no statistics field in Monica's contact payload, so it is + # included whenever the wizard opt is on. The redundant fetch for pet-free + # contacts is the documented cost. + defp collect_misc_data(deferred, api_contact, source_id, local_id, opts) do + stats = api_contact["statistics"] || %{} + + endpoints = + @misc_endpoints + |> Enum.filter(fn {key, stat_field} -> + opts[Atom.to_string(key)] != false and (stats[stat_field] || 1) > 0 + end) + |> Enum.map(&elem(&1, 0)) + + endpoints = if opts["pets"] != false, do: [:pets | endpoints], else: endpoints + + if endpoints == [] do + deferred + else + entry = %{ + source_id: to_string(source_id), + local_id: local_id, + endpoints: Enum.map(endpoints, &Atom.to_string/1) + } + + %{deferred | misc_data: [entry | deferred.misc_data]} + end + end +``` + +Note: `endpoints` are stringified before storing in the plan because the plan will eventually be serialized into Oban job args (JSON-encoded), where atoms don't round-trip cleanly. + +Note on the `(stats[stat_field] || 1) > 0` line: `|| 1` is the safe-default behavior — when the stat field is missing or nil from Monica's payload, we treat it as "≥ 1" so the endpoint fires. We do not want to silently skip data. + +- [ ] **Step 4: Wire `collect_misc_data` into the contact processing loop** + +Find `collect_deferred_data/3` (the function that gathers deferred entries during the contact loop, ~line 569-580). It currently calls `collect_extra_notes`. Locate its callers (`import_api_contact_children/7` at ~line 377 or similar). + +Find the call site that invokes `collect_deferred_data` — the function signature is something like: + +```elixir + defp collect_deferred_data(api_contact, source_id, deferred) do + deferred + |> add_first_met_through_entry(api_contact, source_id) + |> add_relationship_entries(api_contact, source_id) + |> collect_extra_notes(api_contact, source_id) + end +``` + +The actual function name/shape may differ slightly — adapt. Add `collect_misc_data` as a step, threading through the `contact` (for its local id) and `opts`. Since `collect_deferred_data` currently only takes `(api_contact, source_id, deferred)`, the cleanest path is to **extend its signature** to take `(api_contact, source_id, local_id, deferred, opts)` and update the single caller in `import_api_contact_children/7`. + +In `import_api_contact_children/7` (~line 377), find the line: + +```elixir + deferred = collect_deferred_data(api_contact, source_id, deferred) +``` + +Change to: + +```elixir + deferred = collect_deferred_data(api_contact, source_id, contact.id, deferred, ctx.opts) +``` + +(`ctx.opts` was added to `ctx` in commit `6af91bf` — it's already in scope here.) + +Then update `collect_deferred_data` itself to accept the new args and call `collect_misc_data`: + +```elixir + defp collect_deferred_data(api_contact, source_id, local_id, deferred, opts) do + deferred + |> add_first_met_through_entry(api_contact, source_id) + |> add_relationship_entries(api_contact, source_id) + |> collect_extra_notes(api_contact, source_id) + |> collect_misc_data(api_contact, source_id, local_id, opts) + end +``` + +Adapt to the exact existing function body — the principle is: thread `local_id` and `opts` in, append the `|> collect_misc_data(...)` step. + +- [ ] **Step 5: Add `misc_data` to the `crawl/5` return summary** + +Find the `{:ok, %{...}}` map at the end of `crawl/5` (~line 129-138): + +```elixir + {:ok, + %{ + imported: acc.contacts, + contacts: acc.contacts, + notes: acc.notes, + skipped: acc.skipped, + merged: merge_result.merged, + error_count: error_count, + errors: Enum.take(all_errors, 50) + }} +``` + +Change to: + +```elixir + {:ok, + %{ + imported: acc.contacts, + contacts: acc.contacts, + notes: acc.notes, + skipped: acc.skipped, + merged: merge_result.merged, + error_count: error_count, + errors: Enum.take(all_errors, 50), + misc_data_plan: Enum.reverse(deferred.misc_data) + }} +``` + +The plan is reversed so contacts are listed in import order rather than the reverse-insertion order that `[entry | acc]` produces. `MonicaApiCrawlWorker` (next task) will read this key, use it for the misc-worker enqueue, then strip it before persisting the summary to the DB. + +Find where `deferred` is in scope at this return — it's the `_deferred` element from `crawl_all_contacts(ctx)` (~line 88). Currently the code only binds `{acc, deferred}` from that call but doesn't use `deferred` at the return. Locate the bind: + +```elixir + {acc, deferred} = crawl_all_contacts(ctx) +``` + +Confirm `deferred` is in scope for the return tuple. If it isn't (you may see `{acc, _deferred}` ignoring it, or the variable may be shadowed), un-ignore it. + +- [ ] **Step 6: Write a unit test for `collect_misc_data` shape** + +In `test/kith/imports/sources/monica_api_test.exs`, find an existing describe block for `crawl/5` (or add a new one near the end). Add tests: + +```elixir + describe "crawl/5 — misc-data plan" do + test "includes a contact when statistics.number_of_calls > 0", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 1, + first_name: "Has", + last_name: "Calls", + statistics: %{"number_of_calls" => 3} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false + }) + + assert [%{source_id: "1", endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints + end + + test "excludes a contact when all opts are off", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 2, + first_name: "AllOff", + statistics: %{"number_of_calls" => 5, "number_of_gifts" => 5} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => false, + "gifts" => false, + "pets" => false, + "activities" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert summary.misc_data_plan == [] + end + + test "includes :pets unconditionally when opt is on (no stat field)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 3, + first_name: "PetsOnly", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "pets" => true, + "calls" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: ["pets"]}] = summary.misc_data_plan + end + + test "missing statistic field is treated as ≥1 (safe default)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 4, + first_name: "NoStats", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints + end + end +``` + +Verify the test helpers `contact_json/1` and `contacts_page_json/4` accept a `statistics:` keyword. Check the existing test file for examples — if the helper doesn't currently take `statistics`, extend it to merge a `statistics:` key into the contact JSON. If you need to update the helper, do it in the same commit. + +- [ ] **Step 7: Run the new tests** + +Run: `mix test test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS, all tests (existing + 4 new). + +- [ ] **Step 8: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex test/kith/imports/sources/monica_api_test.exs +git commit -m "feat: collect misc-data plan during Monica crawl" +``` + +--- + +## Task 8: Create `MonicaMiscDataWorker` with relocated per-contact helpers + +**Files:** +- Create: `lib/kith/workers/monica_misc_data_worker.ex` +- Create: `test/kith/workers/monica_misc_data_worker_test.exs` + +This task creates the worker as a self-contained module. The per-contact endpoint helpers (`import_contact_pets`, `_calls`, `_activities`, `_gifts`, `_debts`, `_tasks`, `_reminders`, `_conversations`) are **copied** from `MonicaApi` into the worker. The duplication is temporary — Task 9 removes them from `MonicaApi` once the worker is wired up. This staging preserves a "main suite still green" checkpoint between Tasks 8 and 9. + +- [ ] **Step 1: Inspect `MonicaPhotoSyncWorker` for the canonical worker pattern** + +Run: `cat /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/workers/monica_photo_sync_worker.ex | head -90` + +Note: queue, `use Oban.Worker` options, perform/1 args shape, status check, credential rebuild from args, summary update at end, broadcast pattern. Mirror these. + +- [ ] **Step 2: List the per-contact helper boundaries in `MonicaApi`** + +Run: `grep -n "^ defp import_contact_\|^ defp import_single_pet\|^ defp import_single_call\|^ defp import_single_activit\|^ defp import_single_gift\|^ defp import_single_debt\|^ defp import_single_task\|^ defp import_single_reminder\|^ defp import_single_conversat" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: a list of all the per-endpoint functions plus their per-item siblings. Note line ranges for copying. + +- [ ] **Step 3: Write the failing test file first** + +Create `test/kith/workers/monica_misc_data_worker_test.exs`: + +```elixir +defmodule Kith.Workers.MonicaMiscDataWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + + alias Kith.Imports + alias Kith.Workers.MonicaMiscDataWorker + + @stub_name MonicaMiscDataReqStub + + setup do + user = user_fixture() + seed_reference_data!() + + Req.Test.set_req_test_from_context(self()) + + %{user: user, account_id: user.account_id} + end + + defp build_args(import_job, plan) do + %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key", + "plan" => plan, + "req_options" => [plug: {Req.Test, @stub_name}] + } + end + + defp api_import(account_id, user_id, api_options \\ %{}) do + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: api_options, + status: "completed" + }) + end + + describe "perform/1" do + test "fires only the endpoints listed in the plan", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + # Record all endpoint paths the worker calls. + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [ + %{ + "source_id" => "42", + "local_id" => contact.id, + "endpoints" => ["calls", "gifts"] + } + ] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + paths = collect_requests([]) + assert "/api/contacts/42/calls" in paths + assert "/api/contacts/42/gifts" in paths + refute "/api/contacts/42/pets" in paths + refute "/api/contacts/42/activities" in paths + end + + test "exits early when the import is cancelled", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + {:ok, _} = Imports.update_import_status(import_job, "cancelled", %{}) + + contact = contact_fixture(account_id) + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "skips contacts whose local row has been soft-deleted", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + contact = contact_fixture(account_id) + + Kith.Repo.update_all( + from(c in Kith.Contacts.Contact, where: c.id == ^contact.id), + set: [deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)] + ) + + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "writes per-endpoint counts to import_job.summary['misc']", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case conn.request_path do + "/api/contacts/1/calls" -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "called_at" => "2025-01-01", "contact_called" => true}, + %{"id" => 2, "called_at" => "2025-01-02", "contact_called" => false} + ] + }) + + _ -> + Req.Test.json(conn, %{"data" => []}) + end + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + updated = Imports.get_import!(import_job.id) + assert is_map(updated.summary["misc"]) + assert updated.summary["misc"]["calls"] >= 0 + end + end + + defp collect_requests(acc) do + receive do + {:request, path} -> collect_requests([path | acc]) + after + 0 -> Enum.reverse(acc) + end + end +end +``` + +The stub-via-Req.Test pattern matches what `monica_api_test.exs` already uses; copy whichever helper that file relies on if there's a shared fixture (e.g. `contact_field_json/1`). + +The `req_options` arg shape in `build_args/2` mirrors how the existing photo sync worker test injects `Req.Test` stubs into the worker; if the codebase uses a different injection point (e.g. via `Application.put_env`), adapt to that. + +- [ ] **Step 4: Run the test file, expect compilation failure** + +Run: `mix test test/kith/workers/monica_misc_data_worker_test.exs` + +Expected: FAIL with `(UndefinedFunctionError) function Kith.Workers.MonicaMiscDataWorker.__info__/1 is undefined`. + +- [ ] **Step 5: Implement the worker skeleton + relocated helpers** + +Create `lib/kith/workers/monica_misc_data_worker.ex`: + +```elixir +defmodule Kith.Workers.MonicaMiscDataWorker do + @moduledoc """ + Oban worker that imports the per-contact "miscellaneous" data types + (pets, calls, activities, gifts, debts, tasks, reminders, conversations) + for an already-completed Monica API crawl. + + Enqueued by `Kith.Workers.MonicaApiCrawlWorker` on successful completion, + carrying: + + * `"import_id"` — the Import row this job belongs to. + * `"credential_url"`, `"credential_api_key"` — the credential needed to + keep calling Monica after the main crawl wipes `api_key_encrypted`. + Same pattern as `MonicaPhotoSyncWorker`. + * `"plan"` — list of `%{"source_id", "local_id", "endpoints"}` maps + pre-filtered during the main crawl using Monica's `statistics.*` + fields, so we only fire the endpoints with data. + + Throttled through `Kith.Imports.Sources.MonicaApi.RateLimiter` (same + per-host bucket as the main crawler). + + Exits early if the import has been cancelled. Contacts that were + soft-deleted between main-crawl completion and this job's dispatch are + silently skipped. + """ + + use Oban.Worker, queue: :imports, max_attempts: 3 + + require Logger + + import Ecto.Query, warn: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Imports.Sources.MonicaApi.RateLimiter + + @impl Oban.Worker + def timeout(_job), do: :timer.minutes(30) + + @impl Oban.Worker + def perform(%Oban.Job{args: args}) do + import_job = Imports.get_import!(args["import_id"]) + + if import_job.status in ["cancelled", "failed"] do + :ok + else + credential = build_credential(args) + plan = args["plan"] || [] + + counts = process_plan(plan, credential, import_job) + + summary = Map.put(import_job.summary || %{}, "misc", counts) + + Imports.update_import_status(import_job, import_job.status, %{summary: summary}) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_misc_complete, counts}) + + :ok + end + end + + defp build_credential(args) do + %{ + url: args["credential_url"], + api_key: args["credential_api_key"], + req_options: args["req_options"] || [] + } + end + + defp process_plan(plan, credential, import_job) do + initial = %{ + "pets" => 0, + "calls" => 0, + "activities" => 0, + "gifts" => 0, + "debts" => 0, + "tasks" => 0, + "reminders" => 0, + "conversations" => 0 + } + + Enum.reduce(plan, initial, fn entry, counts -> + process_entry(entry, credential, import_job, counts) + end) + end + + defp process_entry(entry, credential, import_job, counts) do + contact = Contacts.get_contact_for_misc(entry["local_id"]) + + if contact == nil or not is_nil(contact.deleted_at) do + counts + else + Enum.reduce(entry["endpoints"] || [], counts, fn endpoint, counts -> + n = fire_endpoint(endpoint, credential, contact, entry["source_id"], import_job) + Map.update(counts, endpoint, n, &(&1 + n)) + end) + end + end + + defp fire_endpoint("pets", c, contact, src, ij), do: import_contact_pets(c, contact, src, ij) + + defp fire_endpoint("calls", c, contact, src, ij), + do: import_contact_calls(c, contact, src, ij) + + defp fire_endpoint("activities", c, contact, src, ij), + do: import_contact_activities(c, contact, src, ij) + + defp fire_endpoint("gifts", c, contact, src, ij), + do: import_contact_gifts(c, contact, src, ij) + + defp fire_endpoint("debts", c, contact, src, ij), + do: import_contact_debts(c, contact, src, ij) + + defp fire_endpoint("tasks", c, contact, src, ij), + do: import_contact_tasks(c, contact, src, ij) + + defp fire_endpoint("reminders", c, contact, src, ij), + do: import_contact_reminders(c, contact, src, ij) + + defp fire_endpoint("conversations", c, contact, src, ij), + do: import_contact_conversations(c, contact, src, ij) + + defp fire_endpoint(other, _, _, _, _) do + Logger.warning("[MonicaMiscData] unknown endpoint #{inspect(other)}; skipping") + 0 + end + + # ── Relocated per-contact helpers ──────────────────────────────────── + # + # Each helper makes one GET against Monica and inserts the returned items. + # Bodies are copied verbatim from MonicaApi; Task 9 removes the originals. + # Helpers return an integer count of successfully imported items so the + # worker can aggregate it into `summary["misc"]`. + + # PASTE THE BODIES OF THE FOLLOWING FUNCTIONS FROM monica_api.ex HERE, + # ADAPTED TO THE NEW (credential, contact, source_id, import_job) SHAPE + # AND RETURNING AN INTEGER COUNT: + # + # import_contact_pets/6 -> import_contact_pets/4 + # import_contact_calls/7 -> import_contact_calls/4 + # import_contact_activities/7 -> import_contact_activities/4 + # import_contact_gifts/6 -> import_contact_gifts/4 + # import_contact_debts/6 -> import_contact_debts/4 + # import_contact_tasks/6 -> import_contact_tasks/4 + # import_contact_reminders/6 -> import_contact_reminders/4 + # import_contact_conversations/7 -> import_contact_conversations/4 + # + # Together with their per-item siblings (import_single_pet, etc.). + # + # base_url is now derived from `credential.url` inside each helper. + # account_id is now derived from `contact.account_id`. + # user_id is no longer needed (calls/activities/conversations are not + # user-scoped; if any helper currently uses user_id only for audit-log + # author, fall back to `import_job.user_id`). + # + # IMPORTANT: every helper that today calls api_get_json must continue to + # call it via `Kith.Imports.Sources.MonicaApi.api_get_json/3` (or the + # equivalent unified helper). To avoid coupling, copy `api_get_json` + # into this module as a small private wrapper that goes through Req + + # RateLimiter the same way: + + defp api_get_json(credential, url, params) do + RateLimiter.wait!(credential.url) + + headers = [ + {"Authorization", "Bearer #{credential.api_key}"}, + {"Accept", "application/json"} + ] + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ Map.get(credential, :req_options, []) + + case Req.get(url, options) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} + end + end + + defp maybe_record_entity(_import_job, _, nil, _, _), do: :ok + + defp maybe_record_entity(import_job, source_type, source_id, local_type, local_id) do + Imports.record_imported_entity( + import_job, + source_type, + to_string(source_id), + local_type, + local_id + ) + end +end +``` + +Now copy the actual bodies of `import_contact_pets/6`, `import_single_pet/4`, `import_contact_calls/7`, `import_single_call/5`, `import_contact_activities/7`, `import_single_activity/5`, `import_contact_gifts/6`, `import_single_gift/4`, `import_contact_debts/6`, `import_single_debt/4`, `import_contact_tasks/6`, `import_single_task/4`, `import_contact_reminders/6`, `import_single_reminder/4`, `import_contact_conversations/7`, and `import_single_conversation/5` (or whatever the exact per-item function names are) from `lib/kith/imports/sources/monica_api.ex` into this new module. + +For each top-level helper, adapt the signature: + +**Before** (in MonicaApi): +```elixir +defp import_contact_pets(credential, base_url, account_id, contact, source_id, import_job) do + url = "#{base_url}/api/contacts/#{source_id}/pets" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => pets}} when is_list(pets) -> + Enum.flat_map(pets, fn pet -> + import_single_pet(account_id, contact, pet, import_job) + end) + + {:ok, _} -> + [] + + {:error, reason} -> + ["Failed to fetch pets for contact #{source_id}: #{inspect(reason)}"] + end +end +``` + +**After** (in MonicaMiscDataWorker): +```elixir +defp import_contact_pets(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/pets" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => pets}} when is_list(pets) -> + Enum.count(pets, fn pet -> + case import_single_pet(contact.account_id, contact, pet, import_job) do + [] -> true # success — no error string + _ -> false + end + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch pets for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end +end +``` + +Apply the same adaptation to all eight top-level helpers. Keep their per-item siblings (`import_single_pet`, `import_single_call`, etc.) unchanged in body — just paste them as-is. The signature change is *only* at the top-level (the function the worker's `fire_endpoint` dispatches to). + +Each top-level helper now returns an integer count instead of a list of errors. Errors become warning logs (Phase 4 errors are not user-actionable; logging is enough). + +- [ ] **Step 6: Add the `Contacts.get_contact_for_misc/1` lookup helper** + +The worker calls `Contacts.get_contact_for_misc/1`. This is a tiny helper avoiding `Repo.get` directly. Add to `lib/kith/contacts.ex`, near `get_contact_field!/2`: + +```elixir + @doc """ + Fetch a contact by ID without scope enforcement, for use by the + Monica misc-data worker. The worker has already verified the contact + belongs to an import the user authorized; we just need the row. + + Returns `nil` if not found. + """ + def get_contact_for_misc(id) when is_integer(id) or is_binary(id) do + Repo.get(Contact, id) + end +``` + +(Alternative: use `Repo.get(Kith.Contacts.Contact, local_id)` directly in the worker — but adding the named helper makes the intent self-documenting and keeps the worker free of direct Repo imports.) + +- [ ] **Step 7: Run the worker test** + +Run: `mix test test/kith/workers/monica_misc_data_worker_test.exs` + +Expected: PASS, all 4 tests. (Some assertions are deliberately loose — e.g. `>= 0` — because the per-item insertion paths may fail validation on fixture data that lacks required fields; the assertion is "the worker called the endpoint and updated the summary," not "every fixture inserted successfully." Tighten if you choose to set up richer fixtures.) + +- [ ] **Step 8: Run the full suite to verify duplicated helpers still pass their existing tests** + +Run: `mix test` + +Expected: PASS. Both `MonicaApi.import_contact_pets/6` (still there) and `MonicaMiscDataWorker.import_contact_pets/4` (newly added) coexist temporarily. Existing tests of the inline Phase 4 path continue to pass. + +- [ ] **Step 9: Commit** + +```bash +git add lib/kith/workers/monica_misc_data_worker.ex test/kith/workers/monica_misc_data_worker_test.exs lib/kith/contacts.ex +git commit -m "feat: add MonicaMiscDataWorker (per-contact extra data, plan-driven)" +``` + +--- + +## Task 9: Cut over — remove inline Phase 4 from `MonicaApi` and enqueue the misc worker + +**Files:** +- Modify: `lib/kith/imports/sources/monica_api.ex` (delete `import_extra_data_types/5`, `import_per_contact_data/7`, eight `import_contact_*` helpers + their `import_single_*` siblings; remove Phase 4 invocation from `crawl/5`; remove `extra_data_errors` accumulation) +- Modify: `lib/kith/workers/monica_api_crawl_worker.ex` (enqueue `MonicaMiscDataWorker`, strip plan from persisted summary) +- Modify: `test/kith/workers/monica_api_crawl_worker_test.exs` (boundary test for misc-worker enqueue) + +This is the largest task; double-check after each deletion that nothing else in `MonicaApi` references the removed functions. + +- [ ] **Step 1: Locate Phase 4 invocation in `crawl/5`** + +Run: `grep -n "import_extra_data_types\|extra_data_errors" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: matches in `crawl/5` (~lines 110-127) and the function definition (~line 1275). + +- [ ] **Step 2: Remove the Phase 4 invocation from `crawl/5`** + +Find the block (~line 109-127): + +```elixir + # Phase 4: Additional data types (per-contact endpoints) + extra_data_errors = + import_extra_data_types(credential, account_id, user_id, import_job, opts) + + # Phase 5: Enqueue document import jobs (async, runs after main import) + if opts["documents"] do + enqueue_document_imports(credential, account_id, user_id, import_job) + end + + all_errors = + acc.errors ++ + ref_errors ++ + notes_errors ++ + merge_result.errors ++ + extra_data_errors + + error_count = + acc.error_count + length(ref_errors) + length(notes_errors) + + length(merge_result.errors) + length(extra_data_errors) +``` + +Replace with: + +```elixir + # Phase 5: Enqueue document import jobs (async, runs after main import) + if opts["documents"] do + enqueue_document_imports(credential, account_id, user_id, import_job) + end + + all_errors = + acc.errors ++ + ref_errors ++ + notes_errors ++ + merge_result.errors + + error_count = + acc.error_count + length(ref_errors) + length(notes_errors) + + length(merge_result.errors) +``` + +- [ ] **Step 3: Delete the eight top-level per-contact helpers and their `import_single_*` siblings** + +Delete the entire blocks (function + Phase header comment) for: + +- `import_extra_data_types/5` and its docstring/comment header +- `import_per_contact_data/7` +- `import_contact_pets/6` + `import_single_pet/4` +- `import_contact_calls/7` + `import_single_call/5` +- `import_contact_activities/7` + `import_single_activity/5` +- `import_contact_gifts/6` + `import_single_gift/4` +- `import_contact_debts/6` + `import_single_debt/4` +- `import_contact_tasks/6` + `import_single_task/4` +- `import_contact_reminders/6` + `import_single_reminder/4` +- `import_contact_conversations/7` + `import_single_conversation/5` + +Use grep to find their exact line ranges: + +```bash +grep -n "^ defp import_contact_\|^ defp import_single_\|^ defp import_extra_data_types\|^ defp import_per_contact_data\|^ # ── Phase " /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex +``` + +Delete each function body from `defp ... do` through the matching `end`. Also delete the `# ── Phase 5: Pets ─...`, `# ── Phase 6: Calls ─...` etc. comment headers, plus the parent `# ── Phases 5-12: Additional per-contact data types ─...` header. + +Do NOT delete `enqueue_document_imports/4` or `Phase 5: Enqueue document import jobs` — those still belong to `MonicaApi` (documents are handled by a separate worker, not the misc worker). + +- [ ] **Step 4: Verify no dangling references inside `MonicaApi`** + +Run: `grep -n "import_contact_\|import_single_pet\|import_single_call\|import_single_activit\|import_single_gift\|import_single_debt\|import_single_task\|import_single_reminder\|import_single_conversat\|import_extra_data_types\|import_per_contact_data\|extra_data_errors" /Users/basharqassis/projects/kith/.claude/worktrees/fix-duplicate-detection/lib/kith/imports/sources/monica_api.ex` + +Expected: no matches. If any remain, delete or update them. + +- [ ] **Step 5: Compile and run Monica + crawl-worker tests** + +Run: `mix compile --warnings-as-errors && mix test test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS. Tests that previously exercised Phase 4 inline (if any) need updating — they should now assert that the misc-data plan is built but Phase 4 endpoints are NOT hit during `crawl/5`. Locate any failing test and replace its assertion (e.g. "asserts 1 pet was inserted") with the new contract (e.g. "asserts the misc_data_plan includes the pets endpoint for this contact"). + +- [ ] **Step 6: Wire `MonicaApiCrawlWorker` to enqueue the misc worker** + +Open `lib/kith/workers/monica_api_crawl_worker.ex`. Find the `perform/1` success branch (around line 41-58): + +```elixir + now = DateTime.utc_now() |> DateTime.truncate(:second) + summary_map = ensure_map(summary) + + Imports.update_import_status(import_job, "completed", %{ + summary: summary_map, + completed_at: now + }) + + Imports.wipe_api_key(import_job) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) + + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) + + # Enqueue photo sync (separate job) if the user opted in + maybe_enqueue_photo_sync(import_job) + + Logger.info("MonicaApi import #{import_id} completed: #{inspect(summary_map)}") + :ok +``` + +Insert the misc-worker enqueue and strip the plan from the persisted summary: + +```elixir + now = DateTime.utc_now() |> DateTime.truncate(:second) + summary_map = ensure_map(summary) + {misc_plan, persisted_summary} = Map.pop(summary_map, :misc_data_plan, []) + persisted_summary = Map.delete(persisted_summary, "misc_data_plan") + + Imports.update_import_status(import_job, "completed", %{ + summary: persisted_summary, + completed_at: now + }) + + maybe_enqueue_misc_data_worker(import_job, misc_plan) + Imports.wipe_api_key(import_job) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, persisted_summary}) + + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) + + # Enqueue photo sync (separate job) if the user opted in + maybe_enqueue_photo_sync(import_job) + + Logger.info("MonicaApi import #{import_id} completed: #{inspect(persisted_summary)}") + :ok +``` + +Note: the `maybe_enqueue_misc_data_worker` call happens BEFORE `wipe_api_key` because the worker needs the still-encrypted key passed as an arg, mirroring the photo-sync pattern. + +Add the helper below `maybe_enqueue_photo_sync/1`: + +```elixir + defp maybe_enqueue_misc_data_worker(_import_job, []), do: :ok + + defp maybe_enqueue_misc_data_worker(import_job, plan) do + %{ + "import_id" => import_job.id, + "credential_url" => import_job.api_url, + "credential_api_key" => import_job.api_key_encrypted, + "plan" => plan + } + |> Kith.Workers.MonicaMiscDataWorker.new() + |> Oban.insert() + end +``` + +Add the alias near the top of the file alongside `MonicaPhotoSyncWorker`: + +```elixir + alias Kith.Workers.MonicaMiscDataWorker +``` + +- [ ] **Step 7: Add a boundary regression test** + +In `test/kith/workers/monica_api_crawl_worker_test.exs`, add a new test inside `describe "perform/1"`: + +```elixir + test "enqueues MonicaMiscDataWorker with the plan from crawl summary", + %{user: user, account_id: account_id} do + # This boundary test guards the wizard → crawl → misc-worker contract: + # the misc_data_plan key produced by MonicaApi.crawl/5 must reach + # MonicaMiscDataWorker.new/1 unmodified, just as auto_merge_duplicates + # had to reach MonicaApi.crawl/5 (Bug C in the previous PR). + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"pets" => true} + }) + + # Stub Monica to return one contact with statistics indicating one + # pet exists — collect_misc_data/5 should emit a plan entry for it. + Req.Test.stub(MonicaApiStub, fn conn -> + cond do + String.contains?(conn.request_path, "/api/contacts") -> + Req.Test.json(conn, %{ + "data" => [ + %{ + "id" => 7, + "first_name" => "Plan", + "last_name" => "Test", + "statistics" => %{"number_of_calls" => 2} + } + ], + "meta" => %{"total" => 1, "last_page" => 1} + }) + + true -> + Req.Test.json(conn, %{"data" => []}) + end + end) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + # Misc worker should now be enqueued with a non-empty plan including + # "calls" for the imported contact. + assert_enqueued( + worker: Kith.Workers.MonicaMiscDataWorker, + args: %{"import_id" => import_job.id} + ) + end +``` + +(The exact stub_name and helper to inject Req.Test will mirror the existing tests in this file — adapt as needed.) + +- [ ] **Step 8: Run the cross-cutting test suite** + +Run: `mix test test/kith/workers/monica_api_crawl_worker_test.exs test/kith/workers/monica_misc_data_worker_test.exs test/kith/imports/sources/monica_api_test.exs` + +Expected: PASS, all tests. + +- [ ] **Step 9: Run the full suite + quality gate** + +Run: `mix quality && mix test` + +Expected: PASS. No new credo, dialyzer, or sobelow findings. + +- [ ] **Step 10: Commit** + +```bash +git add lib/kith/imports/sources/monica_api.ex lib/kith/workers/monica_api_crawl_worker.ex test/kith/workers/monica_api_crawl_worker_test.exs +git commit -m "refactor: extract Phase 4 to MonicaMiscDataWorker; enqueue from crawl worker" +``` + +--- + +## Task 10: End-to-end verification + +**Files:** *(no code changes — verification only)* + +- [ ] **Step 1: Confirm the full test suite passes** + +Run: `mix test` + +Expected: PASS, 1100+ tests, 0 failures. Count should match commit `6af91bf` plus new tests from this PR. + +- [ ] **Step 2: Confirm static analysis is clean** + +Run: `mix quality` + +Expected: `done (passed successfully)`. No new credo, sobelow, or dialyzer findings beyond the existing `.dialyzer_ignore.exs` skips. + +- [ ] **Step 3: Smoke test on dev — wipe and re-import** + +Manual: +- Start dev server: `iex -S mix phx.server` +- In IEx, cancel any in-flight imports: `Oban.cancel_all_jobs(from j in Oban.Job, where: j.worker in ["Kith.Workers.MonicaApiCrawlWorker", "Kith.Workers.MonicaMiscDataWorker", "Kith.Workers.MonicaPhotoSyncWorker"] and j.state in ["executing", "available", "scheduled", "retryable"])` +- Reset the dev account: `Kith.Workers.AccountResetWorker.new(%{"account_id" => , "user_id" => }) |> Oban.insert()` +- Wait for reset to complete; verify contact list is empty. +- Open `/settings/import` in browser; choose Monica API; enter URL and API key; ensure all defaults (including `auto_merge_duplicates`, `pets`, `calls`, etc.) are checked. +- Start the import; observe. + +Expected: +- `MonicaApiCrawlWorker` completes in **under 2 minutes** for ~1000 contacts (Phase 1+2+3 only, throttled at 55/min for ~20-30 pagination + auxiliary calls). +- Wizard transitions to "import complete" at that point; the duplicates tab is reachable and shows a small handful of legitimate pending candidates, NOT 6000. +- `MonicaMiscDataWorker` appears in the Oban dashboard as a separate executing job. +- Its runtime depends on actual misc data volume; for a typical CRM with sparse pet/debt/gift data, **single-digit minutes**. +- Logs show no `"3 attempts left forever"` retry spam. If any 429 fires (e.g. tighter self-hosted Monica limit), Req's built-in retry handles it once and proceeds. + +- [ ] **Step 4: Verify summary shape** + +In IEx: + +```elixir +import_job = Kith.Imports.get_import!() +import_job.summary +``` + +Expected after `MonicaApiCrawlWorker` completes: +```elixir +%{ + "imported" => 1000, + "contacts" => 1000, + "notes" => N, + "skipped" => 0, + "merged" => M, + "error_count" => 0, + "errors" => [] +} +``` + +The `"misc_data_plan"` key should be **absent** (stripped by `MonicaApiCrawlWorker` before persisting). + +After `MonicaMiscDataWorker` completes, refetch: + +```elixir +Kith.Imports.get_import!().summary["misc"] +``` + +Expected: +```elixir +%{ + "pets" => P, + "calls" => C, + "activities" => A, + "gifts" => G, + "debts" => D, + "tasks" => T, + "reminders" => R, + "conversations" => Co +} +``` + +with counts reflecting actual data imported. + +- [ ] **Step 5: Final cleanup commit (if any verification adjustments needed)** + +If smoke testing surfaces any small fixes (typos in log lines, edge cases in the plan filter), commit them as a separate small fix. Otherwise no commit needed for this task. + +- [ ] **Step 6: Push the branch** + +```bash +git push origin fix/duplicate-detection +``` + +Expected: GitHub shows the new commits on top of `6af91bf`. Open a PR if not already open, or update the existing one. + +--- + +## Self-review checklist + +Run through this once before handing off: + +1. **Spec coverage:** + - Part 1 (extract Phase 4): Tasks 7-9 ✓ + - Part 2 (rate limiter): Tasks 1-2 ✓ + - Part 3 (collapse retry): Task 3 ✓ + - Part 4 (statistics short-circuit): Task 7 ✓ + - Part 5a (persistent_term cleanup): Task 6 ✓ + - Part 5b (normalize: false opt): Tasks 4-5 ✓ + - Tests for all of the above: Tasks 1, 4, 5, 7, 8, 9 ✓ + - Verification: Task 10 ✓ + +2. **Placeholders:** All steps contain concrete code, exact commands, exact paths. Each instruction in the cutover task (Task 9) explicitly tells the engineer to `grep` first to find line ranges before deleting — no "delete the appropriate code" hand-waving. + +3. **Type consistency:** + - `MonicaApiCrawlWorker` enqueues with arg keys `"import_id"`, `"credential_url"`, `"credential_api_key"`, `"plan"` (Task 9 Step 6); `MonicaMiscDataWorker.perform/1` reads exactly those keys (Task 8 Step 5). ✓ + - `crawl/5` returns `misc_data_plan: ...` (atom key, Task 7 Step 5); `MonicaApiCrawlWorker` reads `summary_map[:misc_data_plan]` then strips `"misc_data_plan"` (string key) — covers both shapes since `Map.pop/3` returns default `[]` when key absent. ✓ + - `collect_misc_data` stringifies endpoints before storing in the plan (Task 7 Step 3); `MonicaMiscDataWorker.fire_endpoint/5` pattern-matches on strings (`"pets"`, `"calls"`, …) (Task 8 Step 5). ✓ + - `Contacts.create_contact_field/3` accepts `opts` as a keyword list (Task 4); Monica caller passes `normalize: false` (Task 5). ✓ + - `normalize_field_value/3` takes `ctx` (Task 6 Step 5); caller in `import_single_contact_field` passes `ctx` (Task 6 Step 6). ✓ diff --git a/docs/superpowers/specs/2026-03-21-extensible-import-system-design.md b/docs/superpowers/specs/2026-03-21-extensible-import-system-design.md deleted file mode 100644 index 94ef3fc..0000000 --- a/docs/superpowers/specs/2026-03-21-extensible-import-system-design.md +++ /dev/null @@ -1,408 +0,0 @@ -# Extensible Import System with Monica CRM Support - -**Date:** 2026-03-21 -**Status:** Approved - -## Overview - -Build an extensible import framework for Kith that supports multiple data sources (VCF, Monica CRM, future platforms). The first new source is Monica CRM, importing contacts and all associated data from a JSON export file, with optional photo sync via Monica's REST API. - -**Dependencies:** -- [Contact "First Met" Fields & Schema Additions](2026-03-21-contact-first-met-fields-design.md) — must be implemented first; adds `middle_name`, `first_met_at`, `first_met_where`, `first_met_through_id`, `first_met_additional_info`, `first_met_year_unknown`, and `birthdate_year_unknown` to the Contact schema. - -Core principles: -- Kith's schema stays clean — no source-specific fields on core tables -- Import tracking via a generic `import_records` table for source ID → local ID mapping -- Behaviour-based source plugins for extensibility -- Per-contact changeset transactions for granular error reporting -- UI-driven import wizard with real-time progress - -## Database Schema - -### `imports` table - -Tracks each import job. - -| Column | Type | Notes | -|---|---|---| -| id | bigint PK | | -| account_id | references accounts | | -| user_id | references users | | -| source | string | "monica", "vcard", etc. | -| status | string | pending, processing, completed, failed, cancelled | -| file_name | string | | -| file_size | integer | | -| file_storage_key | string | reference to file in Kith.Storage | -| api_url | string | nullable, for photo sync | -| api_key_encrypted | binary | nullable, use `Kith.Vault.EncryptedBinary` Ecto type (auto-encrypts at rest via Cloak, same pattern as `Account.immich_api_key`) | -| api_options | map | nullable, typed as `%{photos: boolean(), first_met_details: boolean()}` — keys match `api_supplement_options()` keys; validated on create | -| summary | map | `%{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: [...]}` — matches `import_summary` type; `errors` capped at 50 entries, `error_count` has true total | -| started_at | utc_datetime | | -| completed_at | utc_datetime | | -| timestamps | | | - -### `import_records` table - -Maps source system IDs to Kith IDs. Keeps all source-specific IDs out of Kith's core schemas. Used for resolving cross-entity references (e.g., `first_met_through` UUID → local contact ID) and for identifying previously-imported entities on re-import. - -| Column | Type | Notes | -|---|---|---| -| id | bigint PK | | -| account_id | references accounts | | -| import_id | references imports | Set on first import, updated to latest import_id on re-import | -| source | string | "monica", "vcard", etc. | -| source_entity_type | string | "contact", "note", etc. | -| source_entity_id | string | UUID from source system | -| local_entity_type | string | "contact", "note", etc. | -| local_entity_id | bigint | Kith's DB id | -| timestamps | | | - -**Unique index:** `[account_id, source, source_entity_type, source_entity_id]` - -**Scope note:** This index deduplicates within a single source system per account. The same real-world entity imported from two different sources (e.g., VCard and Monica) will create two `import_records` entries — this is intentional; cross-source deduplication is a separate concern handled by content-level duplicate detection. - -## Import Framework - -### File Storage - -Uploaded files are stored via `Kith.Storage` under `imports/{import_id}/` and referenced by storage key in the `imports` table. The Oban worker receives only the `import_id` — never raw file data in job args (Oban args are JSONB with practical size limits). The worker loads the file from storage at runtime. - -The `imports` table includes a `file_storage_key` column for this reference. - -**File size expectation:** The `Source.import/4` callback receives the entire file as a binary. Monica JSON exports are typically 1–50 MB for most accounts. For the expected range this is fine; if a source could produce files >100 MB, it should implement streaming internally. The `ImportSourceWorker` loads the file from storage into memory before calling the source. - -### Concurrent Import Guard - -Two-layer guard: - -1. **Database constraint:** Add a unique partial index on `imports (account_id) WHERE status IN ('pending', 'processing')`. This prevents race conditions where two concurrent requests both pass the application-level check. - -2. **Application check:** `Kith.Imports.create_import/3` queries for an existing active import and returns `{:error, :import_in_progress}` if found. The UI disables the "Start Import" button when an import is active. - -Concurrent imports won't corrupt data (upserts are idempotent), but the guard prevents photo sync jobs from competing for API rate limits. - -### Source Behaviour - -```elixir -defmodule Kith.Imports.Source do - @type opts :: map() - @type credential :: %{url: String.t(), api_key: String.t()} - @type import_summary :: %{ - contacts: non_neg_integer(), - notes: non_neg_integer(), - skipped: non_neg_integer(), - error_count: non_neg_integer(), - errors: [String.t()] # capped at 50 entries; error_count has the true total - } - - @callback name() :: String.t() - @callback file_types() :: [String.t()] - # validate_file: structural check only (correct format, required keys present) - @callback validate_file(binary()) :: {:ok, map()} | {:error, String.t()} - # parse_summary: deeper parse returning entity counts for the confirmation screen - @callback parse_summary(binary()) :: {:ok, map()} | {:error, String.t()} - @callback import(account_id :: integer(), user_id :: integer(), data :: binary(), opts()) :: - {:ok, import_summary()} | {:error, term()} - @callback supports_api?() :: boolean() - - # Optional callbacks — only required when supports_api?() returns true - @callback test_connection(credential()) :: :ok | {:error, String.t()} - @callback fetch_photo(credential(), resource_id :: String.t()) :: - {:ok, binary()} | {:error, term()} - # Returns list of supplementary data types the API can provide beyond the file export - @callback api_supplement_options() :: [%{key: atom(), label: String.t(), description: String.t()}] - @callback fetch_supplement(credential(), contact_source_id :: String.t(), key :: atom()) :: - {:ok, map()} | {:error, term()} - - @optional_callbacks [test_connection: 1, fetch_photo: 2, api_supplement_options: 0, fetch_supplement: 3] -end -``` - -### Cancellation - -Import jobs support cancellation. The worker checks a `cancelled` flag on the import record between each contact. The UI shows a "Cancel Import" button during processing. On cancel: -1. Set `imports.status` to `cancelled` -2. Worker checks status before each contact, stops if cancelled -3. Already-imported contacts remain (import is resumable) -4. Photo sync jobs for cancelled imports are discarded via `Oban.cancel_all_jobs/1` - -### Source Implementations - -- `Kith.Imports.Sources.VCard` — wraps existing `Kith.VCard.Parser` logic -- `Kith.Imports.Sources.Monica` — new, parses JSON export + API photo sync - -### Context Module - -`Kith.Imports` — manages import jobs, resolves source modules, handles `import_records` lookups. - -Key functions: -- `create_import/3` — create an import job record -- `find_import_record/4` — look up existing record by source + entity type + entity id -- `record_imported_entity/5` — upsert an import_record (create or update import_id) -- `resolve_source/1` — map source string to module - -### Generic Worker - -`Kith.Workers.ImportSourceWorker` — Oban worker that: -1. Loads the import job -2. Resolves the source module -3. Calls `source.import/4` -4. Broadcasts progress via PubSub -5. Updates import job status and summary - -Replaces the existing `ImportWorker` for new imports. - -## Monica Source — Data Mapping - -### Processing Order (dependency chain) - -**Scope note:** The Monica JSON export contains: contacts, contact_fields, addresses, notes, reminders, pets, photos, activities, and relationships. It does NOT contain gifts, debts, calls, life_events, or conversations — those are Kith-specific features not present in Monica. - -**Creator/Author assignment:** Many Kith schemas require `creator_id` or `author_id` (Note, Reminder, Activity, etc.). During import, these are set to the `user_id` of the user who initiated the import. - -**Phase 1 — Reference data** (no dependencies): -- Genders → `Kith.Contacts.Gender` (find-or-create by name) -- Contact field types → `Kith.Contacts.ContactFieldType` (find-or-create by name) -- Relationship types → `Kith.Contacts.RelationshipType` (find-or-create by name) -- Activity type categories → `Kith.Contacts.ActivityTypeCategory` (find-or-create by name, needed for activities) -- Tags → `Kith.Contacts.Tag` (find-or-create by name, scoped to account) -- Pet categories → mapped to Kith's `species` enum: - -| Monica pet_category | Kith species | -|---|---| -| Dog | dog | -| Cat | cat | -| Bird | bird | -| Fish | fish | -| Reptile | reptile | -| Rabbit | rabbit | -| Hamster | hamster | -| (all others) | other | - -**Phase 2 — Contacts** (depends on: genders): - -| Monica Property | Kith Field | Notes | -|---|---|---| -| first_name | first_name | direct | -| last_name | last_name | direct | -| middle_name | middle_name | direct | -| nickname | nickname | direct | -| company | company | direct | -| job | occupation | rename | -| is_starred | favorite | rename | -| is_active: false | is_archived: true | inverted | -| is_dead | deceased | rename | -| description | description | direct | -| first_met_date (nested special_date object) | first_met_at | extract `.date` from the nested object; see Partial Date Handling below | -| first_met_through (UUID) | first_met_through_id | resolve via import_records after all contacts imported (Phase 4, alongside relationships) | -| first_met_where | first_met_where | NOT in JSON export. Fetched via API supplement (`GET /api/contacts/{id}`) if user enables "Fetch how we met details" option. | -| first_met_additional_info | first_met_additional_info | NOT in JSON export. Fetched via API supplement (same call as above). | -| gender (UUID) | gender_id | via import_records lookup | -| birthdate (nested special_date object) | birthdate | Same structure as `first_met_date` — extract `.date`; see Partial Date Handling below | -| tags (UUID array) | tags | find-or-create tags by name (account-scoped), then insert join table rows | - -**Phase 3 — Contact children** (depends on: contacts, reference data): - -Each is nested inside its parent contact in the JSON. - -- `contact_field` → `Kith.Contacts.ContactField` (type UUID → contact_field_type_id via lookup) -- `address` → `Kith.Contacts.Address` (Monica splits address/place — flatten into Kith's single address schema) -- `note` → `Kith.Contacts.Note` -- `reminder` → `Kith.Reminders.Reminder` -- `pet` → `Kith.Contacts.Pet` (pet_category → species enum mapping) -- `photo` → `Kith.Contacts.Photo` (metadata only; `storage_key` set to a `"pending_sync:{source_photo_uuid}"` placeholder; file downloaded in Phase 5. Photo records with `pending_sync:` prefix are treated as unsynced: the `Photo` context module should expose a `Photo.pending_sync?/1` helper that pattern-matches the prefix. The UI uses this to show a placeholder/spinner instead of calling `Storage.url/1` on the pending key. The API omits the `url` field for pending photos.) -- `activity` → `Kith.Activities.Activity` (with `activity_type_category_id` via lookup; activities shared across multiple contacts: deduplicate by UUID — on first encounter, create the activity and its join table entry; on subsequent contacts referencing the same activity UUID, add only the join table entry). The worker maintains a `MapSet` of processed activity UUIDs in memory during the import to track which activities have been created. - -**Resumability note:** On a resumed import (after cancellation), the in-memory `MapSet` starts empty. The worker must first check `import_records` for existing activity mappings before attempting insert. If an `import_record` exists for an activity UUID, skip creation and insert only the join table entry. This makes the `MapSet` an optimization (avoids repeated DB lookups within a single run), not a source of truth. - -**Phase 4 — Cross-contact references** (depends on: contacts, relationship types): - -Relationships (top-level in the JSON): -- Each references two contact UUIDs (`contact_is`, `of_contact`) and a relationship type -- Look up both contacts via `import_records` -- Look up relationship type -- Create `Kith.Contacts.Relationship` - -First-met-through links: -- For contacts with a `first_met_through` UUID, look up the referenced contact via `import_records` -- Update the contact's `first_met_through_id` -- If the referenced contact was not imported, log a warning and leave null - -**Phase 5 — Photo files** (async, depends on: photo records from phase 3): - -Handled by separate `PhotoSyncWorker` jobs. See Photo Sync section. - -### Partial Date Handling - -Monica's `birthdate` and `first_met_date` are nested `special_date` objects with `is_year_unknown` and `is_age_based` flags. Kith's `birthdate` and `first_met_at` are `:date` columns that require a full year+month+day. - -**Schema change required:** Add `birthdate_year_unknown` (boolean, default false) to `Kith.Contacts.Contact`. When Monica provides a date with `is_year_unknown: true`, store the date using a sentinel year (year 1) and set `birthdate_year_unknown: true`. The UI and API should omit the year when this flag is set. - -Same approach for `first_met_at` — add `first_met_year_unknown` (boolean, default false) to the Contact schema. This field is included in the First Met Fields migration. - -When `is_age_based` is true, Monica computed the birthdate from an entered age — treat the year as approximate but known (import as a normal date, don't set the unknown flag). - -### Per-Contact Flow - -``` -For each contact in JSON: - 1. Check import status — if cancelled, stop processing - 2. Look up import_records for [account, "monica", "contact", contact.uuid] - 3. If found: - a. Check if local contact is soft-deleted (deleted_at set) - → skip, log "previously deleted in Kith, not restoring" - b. Otherwise → upsert contact + re-import children in Ecto.Multi - 4. If not found → insert contact + all children in Ecto.Multi - 5. Upsert import_record with current import_id - 6. Broadcast progress via PubSub — every `max(1, total ÷ 50)` contacts (adaptive: frequent enough for small imports, not excessive for large ones) - 7. Log result with contact name for debugging - 8. On changeset error → log detailed error (capped at 50 in summary), continue to next contact -``` - -### Relationship Edge Cases - -Phase 4 imports relationships after all contacts. If one of the two referenced contacts failed to import (changeset error in Phase 2), the relationship is skipped with a warning log: "Skipping relationship {type} between {uuid_a} and {uuid_b}: contact {failed_uuid} was not imported." - -## Photo Sync - -### Rate Limiting - -Monica defaults to 60 requests/minute per API key. - -**Approach:** Each photo is an independent Oban job with staggered scheduling. - -- After main import completes, enqueue one `PhotoSyncWorker` job per photo -- Jobs are scheduled with staggered `scheduled_at` timestamps: batches of 50 with 60-second gaps -- Each job is independent — a retry only re-downloads that single photo, never the batch - -### PhotoSyncWorker - -`Kith.Workers.PhotoSyncWorker` — Oban worker, queue: `:photo_sync` - -**Config requirement:** Add `photo_sync: 5` to Oban queues in `config/config.exs`. - -Per job: -1. Load the photo record and import record -2. Check `Kith.Storage.check_storage_limit/2` — if account is at capacity, mark photo as failed and return `:discard` -3. Call `GET {monica_url}/api/photos/{source_photo_id}` with Bearer token -4. Download binary → store via `Kith.Storage` -5. Update `Kith.Contacts.Photo` record with stored file path -6. On HTTP 429 → return `{:snooze, 60}` (Oban reschedules after 60s, does NOT reprocess batch) -7. On max retries exhausted → delete the Photo record (contact becomes photoless rather than having a permanently broken reference) -8. On other errors → Oban retries with backoff (max 3 attempts) - -### API Supplement Worker - -`Kith.Workers.ApiSupplementWorker` — Oban worker, queue: `:api_supplement` - -**Config requirement:** Add `api_supplement: 3` to Oban queues in `config/config.exs`. - -Handles all non-photo API fetches (first_met details, future supplement types). One job per contact **that has a `first_met_date` in the JSON export** — contacts without any first-met data are skipped (significantly reduces API calls). Staggered like photo sync (batches of 50, 60-second gaps). - -Per job: -1. Load the import record and contact -2. Call `GET {monica_url}/api/contacts/{source_contact_id}` with Bearer token -3. Extract `first_met_where` and `first_met_additional_information` from the response -4. Update the Kith contact record -5. On HTTP 429 → `{:snooze, 60}` -6. On other errors → Oban retries with backoff (max 3 attempts) - -The worker checks `api_options` on the import to determine which fields to fetch. If only `first_met_details` is selected (no photos), only this worker runs. If both are selected, both workers run concurrently with independent rate limiting. - -### Progress - -Photo sync and API supplement progress are tracked separately from the main import: -- Import summary includes `photos_total`, `photos_synced`, `supplements_total`, `supplements_synced` counters -- PubSub broadcasts progress for the UI on topic `"import:#{account_id}"` - -### Post-Import Cleanup - -**File cleanup:** Import files stored in `imports/{import_id}/` are retained for 30 days after import completion, then deleted. Add a periodic Oban cron job (`ImportFileCleanupWorker`, queue: `:default`, weekly schedule `"0 5 * * 0"`) that queries for completed/failed imports older than 30 days with a non-null `file_storage_key`, deletes the file from Storage, and nullifies the `file_storage_key`. - -**API key lifecycle:** When all async jobs for an import are complete (photo sync + API supplement), wipe `api_key_encrypted` from the imports record. The `ImportSourceWorker` checks after the main import; the last completing `PhotoSyncWorker` or `ApiSupplementWorker` also checks. A simple approach: after each async job completes, query for remaining pending jobs for that import — if zero remain, nullify the API key. - -**Failed photo cleanup:** When a `PhotoSyncWorker` job exhausts all 3 retry attempts, delete the `Kith.Contacts.Photo` record entirely. The contact simply has no photo rather than a permanently broken `pending_sync:` reference. This is handled in the worker's `max_attempts` exceeded callback. - -## Import Wizard UI - -### Location - -Replaces the existing import UI at `KithWeb.SettingsLive.Import` (`/settings/import`). The new `ImportWizardLive` handles multiple source types and is mounted at the same route. - -### Flow - -**Step 1 — Source selection:** -- Tabs or radio: "vCard (.vcf)" | "Monica CRM" -- Selecting a source shows its specific form - -**Step 2 — Monica form:** -- File upload (accepts `.json`) -- On upload: validate JSON structure (check `version`, `app_version`, `account.data`) -- Show summary: "Found 851 contacts, 26 relationships, 313 photos" -- Optional expandable section: "Connect to Monica API" - - Monica URL field - - API key field - - "Test Connection" button → hits `/api/me`, shows inline success/failure - - On successful connection, show checkboxes for API-supplemented data (from `api_supplement_options/0`): - - [x] Sync photos (313 found) - - [x] Fetch "How we met" details (first_met_where, first_met_additional_info) - - Future sources can add their own options here - - Checkboxes are only shown after a successful connection test - - Selected options are stored on the `imports` record as `api_options` (map) - -**Step 3 — Confirmation:** -- Summary table of what will be imported -- On re-import: "247 new contacts, 604 existing (will be updated)" -- "Start Import" button - -**Step 4 — Progress (LiveView):** -- Progress bar: "Processing contact 142/851..." -- Running counters: imported / updated / skipped / errors -- Expandable error log with specific failures -- On main import completion: summary card with totals -- If API options enabled, secondary progress bars that continue after main import: - - "Syncing photos: 42/313" (if photos selected) - - "Fetching details: 100/851" (if first_met_details selected) - -### Implementation - -`ImportWizardLive` LiveView with source-specific components: -- `MonicaImportComponent` — handles Monica-specific form, validation, summary -- `VcardImportComponent` — wraps existing VCF import UI - -PubSub updates from workers drive real-time progress — same pattern as existing `ImportWorker`. - -## VCard Refactoring - -Wrap existing VCard import into the new framework: - -- `Kith.Imports.Sources.VCard` implements `Source` behaviour -- Internally delegates to existing `Kith.VCard.Parser` -- VCard imports also write to `import_records` for consistency -- Existing `ImportWorker` is deprecated; new imports use `ImportSourceWorker` -- Old worker remains for any in-flight jobs to complete - -**Data flow:** The `ImportSourceWorker` loads the file from `Kith.Storage` using `file_storage_key`, reads it into a binary, and passes it to `source.import/4`. The VCard Source receives the binary and delegates to `Kith.VCard.Parser.parse/1` (same input format as today). This means the upload step must store the file via `Kith.Storage` before enqueuing the Oban job — the current `ImportWorker` pattern of passing `file_data` in job args is not carried over. - -**Existing imports:** Contacts previously imported via the old `ImportWorker` have no `import_records` entries. The first VCard import under the new system treats all contacts as new — existing `contact_exists?/2` duplicate detection (email/name match) is not carried into the new framework. Users who re-import an old VCard may see duplicates; this is acceptable as a one-time migration cost and can be resolved via the existing duplicate detection feature (`DuplicateDetectionWorker`). - -## File Structure - -``` -lib/kith/imports.ex # Context module -lib/kith/imports/source.ex # Behaviour definition -lib/kith/imports/import.ex # Import schema (job tracking) -lib/kith/imports/import_record.ex # ImportRecord schema (dedup) -lib/kith/imports/sources/monica.ex # Monica source implementation -lib/kith/imports/sources/vcard.ex # VCard source (wraps existing parser) -lib/kith/workers/import_source_worker.ex # Generic import Oban worker -lib/kith/workers/photo_sync_worker.ex # Photo download Oban worker -lib/kith/workers/api_supplement_worker.ex # API data supplement Oban worker -lib/kith/workers/import_file_cleanup_worker.ex # Periodic cleanup of import files (30-day retention) - -lib/kith_web/live/import_wizard_live.ex # Import wizard LiveView -lib/kith_web/live/components/monica_import_component.ex -lib/kith_web/live/components/vcard_import_component.ex - -priv/repo/migrations/TIMESTAMP_create_imports_and_import_records.exs -``` diff --git a/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md b/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md new file mode 100644 index 0000000..fc1b43e --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md @@ -0,0 +1,498 @@ +# Account Reset Completeness + +**Date:** 2026-05-15 +**Status:** Approved +**Scope:** Personal CRM (`kith`) + +## Context + +On dev, a Monica re-import after an account reset failed photo sync. The Monica +photo sync worker (`Kith.Workers.MonicaPhotoSyncWorker`) reported "contact is +deleted" for photos whose `contact.id` matched contacts that should no longer +exist. Root cause: the current `Kith.Workers.AccountResetWorker` hard-deletes +contacts (via CASCADE) but does **not** delete the polymorphic mapping rows in +`import_records`, nor the parent `imports` rows. On re-import: + +1. `MonicaApi.crawl/5` looks up `find_import_record(account, "monica_api", "contact", source_id)` +2. Finds a stale row from the prior import, pointing at a now-deleted `local_entity_id` +3. `handle_existing_contact` calls `Repo.get(Contacts.Contact, local_id)` → `nil` → + the function falls through to `do_create_api_contact` which creates a new contact + and inserts a second `import_record` for the same `(account_id, source, source_entity_type, source_entity_id)` tuple +4. The unique constraint on that tuple raises, OR the photo sync subsequently calls + `Repo.one` against the same lookup and crashes on `Ecto.MultipleResultsError` + +The bug surfaced as photo sync silently failing, but the underlying issue is +that the reset is incomplete in multiple dimensions, not just imports. + +### Other tables left orphaned by the current reset + +| Table | Today's behavior | Should be | +|---|---|---| +| `imports` | Untouched | Wiped | +| `import_records` | Untouched | Wiped | +| `conversations` | Untouched | Wiped (CASCADE → `messages`) | +| `journal_entries` | Untouched | Wiped | +| `tasks` | Untouched | Wiped | +| `reminders` (records) | Oban jobs cancelled; records remain | Wiped (CASCADE → `reminder_rules`, `reminder_instances`) | +| Reference data (genders, types) | Preserved | Preserved (no change) | +| `account_invitations` | Preserved | Preserved (no change) | + +### In-flight Oban jobs are also a hazard + +If a `MonicaApiCrawlWorker` or `MonicaPhotoSyncWorker` is running when reset +starts, it keeps inserting rows after the wipe. The current reset only cancels +reminder jobs (`cancel_reminder_jobs/1`). It must also cancel pending/scheduled +import-related jobs — but **only those belonging to the resetting account**, so +no other account's work is touched. + +## Goals + +1. After `AccountResetWorker` completes, no account-scoped data for the target + account remains beyond reference data (genders, relationship_types, + contact_field_types, etc.) and `account_invitations`. +2. A subsequent re-import (Monica API or vCard) for the same account succeeds + without seeing stale `import_records` from prior runs. +3. The reset cancels all in-flight import-related Oban jobs for the target + account before wiping data, eliminating the mid-flight write race. +4. Every cleanup operation is account-scoped. Running reset on account A does + not affect any row, file, or Oban job belonging to account B. +5. The fix does not turn `AccountResetWorker` into a god-module. Each domain's + cleanup lives next to that domain. + +## Non-goals + +- Preserving import history after reset. "Completely wipe" means the `imports` + rows go too. The Oban job record (state, completed_at) is the audit trail. +- Reference data preservation changes. Genders, relationship_types, etc. + continue to be preserved (current behavior). +- Hardening the photo sync worker against stale state as a belt-and-suspenders + defense. With reset cancelling jobs and wiping `import_records`, the worker + cannot see stale references. If a future bug bypasses reset, that's a + separate fix. +- Multi-tenant data-isolation review across the rest of the codebase. This + spec only addresses the reset path. + +## Out of scope + +- Soft-delete of accounts themselves (the `accounts` row stays). +- User accounts (`users` table). Reset clears data, not auth. +- Custom contact_field_types or other reference data the user has added — + preserved per the recommendation in the brainstorming session. +- Adding a DB-level FK to `import_records.local_entity_id`. The polymorphic + mapping pattern is intentional. + +## Design + +### Module decomposition + +The worker becomes pure orchestration. Each domain's cleanup module lives in +that domain's namespace. No `@behaviour` ceremony — a function-naming +convention (`wipe_for_account/1` returning `:ok`) is sufficient for one +consumer. + +``` +lib/kith/ +├── activities/cleanup.ex # NEW — wipe account-scoped activities +├── audit_logs/cleanup.ex # NEW — wipe audit_logs +├── contacts/cleanup.ex # NEW — hard-delete contacts (CASCADE) + tags +├── conversations/cleanup.ex # NEW — wipe conversations (CASCADE → messages) +├── imports/cleanup.ex # NEW — wipe imports + import_records +├── imports/job_cancellation.ex # NEW — cancel pending Oban jobs for THIS account's imports +├── journal/cleanup.ex # NEW — wipe journal_entries +├── reminders/cleanup.ex # NEW — cancel reminder Oban jobs + wipe reminders (CASCADE) +├── storage/account_cleanup.ex # NEW — delete photo + document + import-upload files +├── tasks/cleanup.ex # NEW — wipe tasks +└── workers/account_reset_worker.ex # REFACTOR — orchestrator only (~40 LoC) +``` + +(Note: `tags` is wiped inside `Contacts.Cleanup` because it shares the +contacts axis-of-change. `activities` is its own context and gets its own +module per SOLID-elixir's SRP-module guidance.) + +Each cleanup module exposes a single function: + +```elixir +defmodule Kith.Imports.Cleanup do + @moduledoc "Wipes all import history for a single account." + + alias Kith.{Imports.Import, Imports.ImportRecord, Repo} + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {records, _} = Repo.delete_all(from(r in ImportRecord, where: r.account_id == ^account_id)) + {imports, _} = Repo.delete_all(from(i in Import, where: i.account_id == ^account_id)) + Logger.info("[Imports.Cleanup] wiped #{records} records + #{imports} imports for account #{account_id}") + :ok + end +end +``` + +The worker: + +```elixir +defmodule Kith.Workers.AccountResetWorker do + use Oban.Worker, + queue: :default, + max_attempts: 3, + unique: [period: 300, fields: [:args], keys: [:account_id]] + + require Logger + + alias Kith.{AuditLogs, Contacts, Conversations, Imports, Journal, Reminders, Storage, Tasks} + + @cleaners [ + Imports.JobCancellation, + Storage.AccountCleanup, + Contacts.Cleanup, + Imports.Cleanup, + Conversations.Cleanup, + Reminders.Cleanup, + Tasks.Cleanup, + Journal.Cleanup, + Activities.Cleanup, + AuditLogs.Cleanup + ] + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"account_id" => account_id}}) do + Logger.metadata(account_id: account_id, worker: "AccountReset") + Logger.info("[AccountReset] starting reset for account #{account_id}") + write_initiated_audit_log(account_id) + + Enum.each(@cleaners, fn cleaner -> + Logger.info("[AccountReset] running #{inspect(cleaner)}") + :ok = cleaner.wipe_for_account(account_id) + end) + + Logger.info("[AccountReset] completed reset for account #{account_id}") + :ok + end + + defp write_initiated_audit_log(account_id) do + Kith.AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{reason: "Account data reset initiated"} + }) + end +end +``` + +### Data flow & order-of-operations + +The ordering is load-bearing: + +1. **`Imports.JobCancellation`** — must run FIRST. Otherwise a running + `MonicaApiCrawlWorker` keeps inserting rows after the wipe. +2. **`Storage.AccountCleanup`** — must run BEFORE `Contacts.Cleanup`. Contact + CASCADE deletes the `photos` and `documents` rows; once those rows are gone, + we can no longer iterate their `storage_key` values to delete files. Also + sweeps `imports.file_storage_key` for uploaded vCards. +3. **`Contacts.Cleanup`** — hard-deletes contacts; CASCADE removes addresses, + contact_fields, photos rows, documents rows, notes, debts, gifts, pets, + emotions, relationships, calls, life_events, duplicate_candidates, + immich_candidates. +4. **`Imports.Cleanup`** — wipes `import_records` then `imports`. Runs AFTER + contacts so `local_entity_id` references are already dangling — we just + sweep the whole table for this account, no coordination needed. +5. **`Conversations.Cleanup`** — wipes conversations; CASCADE removes messages. +6. **`Reminders.Cleanup`** — first cancels reminder Oban jobs (matching the + existing pattern, scoped to this account), then deletes reminders; CASCADE + removes reminder_rules, reminder_instances. +7. **`Tasks.Cleanup`** — wipes tasks. +8. **`Journal.Cleanup`** — wipes journal_entries. +9. **`Activities.Cleanup`** — wipes account-scoped `activities` (no contact FK). + (Note: `tags` is wiped inside `Contacts.Cleanup` at step 3.) +10. **`AuditLogs.Cleanup`** — runs LAST. The "account_data_reset" audit log + written at start needs to live until the reset completes; wiping it earlier + would erase the audit trail of the reset itself. + +### Account-scoped Oban job cancellation + +`Kith.Imports.JobCancellation.wipe_for_account/1` cancels jobs by querying +`Oban.Job` directly with account-scoped filters: + +```elixir +defmodule Kith.Imports.JobCancellation do + @moduledoc """ + Cancels all pending/scheduled Oban jobs for a single account's imports. + Scoping rule: only this account's import_ids and account_id are matched. + No other account's jobs are touched. + """ + + alias Kith.{Imports.Import, Repo} + import Ecto.Query + require Logger + + @import_workers ~w[ + Elixir.Kith.Workers.MonicaApiCrawlWorker + Elixir.Kith.Workers.MonicaPhotoSyncWorker + Elixir.Kith.Workers.MonicaDocumentImportWorker + Elixir.Kith.Workers.ImportSourceWorker + ] + + @cancellable_states ~w[available scheduled retryable executing] + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + import_ids = account_import_ids(account_id) + + import_cancelled = cancel_jobs_by_import_id(import_ids) + account_cancelled = cancel_jobs_by_account_id(account_id) + + Logger.info( + "[Imports.JobCancellation] cancelled #{import_cancelled} import job(s) + " <> + "#{account_cancelled} account-scoped job(s) for account #{account_id}" + ) + + :ok + end + + defp account_import_ids(account_id) do + Repo.all(from(i in Import, where: i.account_id == ^account_id, select: i.id)) + end + + defp cancel_jobs_by_import_id([]), do: 0 + + defp cancel_jobs_by_import_id(import_ids) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker in ^@import_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'import_id')::int", j.args) in ^import_ids + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end + + defp cancel_jobs_by_account_id(account_id) do + jobs = + Repo.all( + from(j in Oban.Job, + where: j.worker == "Elixir.Kith.Workers.DuplicateDetectionWorker", + where: j.state in ^@cancellable_states, + where: fragment("(?->>'account_id')::int", j.args) == ^account_id + ) + ) + + Enum.each(jobs, &Oban.cancel_job/1) + length(jobs) + end +end +``` + +Two key properties: +- **Account-scoped**: every WHERE clause filters by `account_id` (directly or + transitively via `import_id IN account's imports`). +- **State filter**: only jobs in cancellable states are touched. Completed and + cancelled jobs are left alone. + +### Error handling + +- Each cleanup module returns `:ok` on success, raises on unexpected failure. +- The worker's `Enum.each` propagates the raise. Oban catches, marks the job + `:retryable`, and retries per backoff (`max_attempts: 3`). +- After 3 attempts, the job moves to `:discarded`. The Oban Web dashboard + surfaces this to admins. The audit log written at the start is still present + (since `AuditLogs.Cleanup` is last) → user/admin can see the reset was + attempted. +- Bulk deletes are NOT wrapped in `Ecto.Multi`. Each `Repo.delete_all` is its + own transaction; large accounts don't fight for a single long-held lock. +- Cleanup operations are inherently idempotent (deleting from an empty table + succeeds with `{0, nil}`). Retries are safe. + +### Storage delete: the one warn-and-continue path + +Storage operations can fail benignly (S3 already deleted, network blip). The +existing pattern is preserved: + +```elixir +defp safe_delete_file(nil), do: :ok + +defp safe_delete_file(key) do + case Kith.Storage.delete(key) do + :ok -> :ok + + {:error, reason} -> + Logger.warning("[Storage.AccountCleanup] failed to delete #{key}: #{inspect(reason)}") + :ok + end +end +``` + +This is `:ok` because storage objects are recoverable separately (S3 lifecycle, +manual sweep) and don't affect data integrity. + +### Observability + +Logger metadata: every cleanup logs with `account_id` and `worker` in +`Logger.metadata`, plus a `[Module.Name]` prefix in the message body. Sample: + +``` +[AccountReset] starting reset for account 42 +[AccountReset] running Kith.Imports.JobCancellation +[Imports.JobCancellation] cancelled 3 import job(s) + 1 account-scoped job(s) for account 42 +[AccountReset] running Kith.Storage.AccountCleanup +[Storage.AccountCleanup] deleted 47 photo files + 12 document files + 2 import uploads for account 42 +[AccountReset] running Kith.Contacts.Cleanup +[Contacts.Cleanup] hard-deleted 423 contacts (CASCADE) for account 42 +... +[AccountReset] completed reset for account 42 +``` + +The structured `account_id` metadata reaches log search and Sentry as a tag, +not just a substring in the message. + +## Testing + +### Per-module unit tests + +Every Cleanup module gets `test/kith//cleanup_test.exs` with the same +shape: + +- Fixture data for the target account AND a control account +- Call `wipe_for_account(target_account_id)` +- Assert: target rows are zero; control rows are unchanged + +The **control-account untouched assertion is mandatory** in every test — it's +the contract that protects against cross-account leakage. + +### `Imports.JobCancellation` test + +Uses `Oban.Testing`. Inserts pending jobs for both accounts (matching all four +`@import_workers` plus `DuplicateDetectionWorker`). After +`wipe_for_account(target)`: + +- Target's jobs: state `"cancelled"` +- Other account's jobs: state `"available"` (unchanged) +- Completed jobs (state `"completed"`) for the target: also unchanged (we only + cancel still-cancellable states) + +### Regression test for the user-reported bug + +`test/kith/workers/account_reset_worker_test.exs` gets the actual scenario +that broke on dev: + +```elixir +test "re-import after reset can sync photos without finding stale import_records", ctx do + # Initial import: creates contact + import_record for Monica id 964 + import_a = import_fixture(ctx.account, ctx.user_id, %{source: "monica_api"}) + contact_a = contact_fixture(ctx.account) + {:ok, _} = Imports.record_imported_entity(import_a, "contact", "964", "contact", contact_a.id) + + # Full reset + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.account}) + + # Target account fully wiped + assert count_for(Contacts.Contact, ctx.account) == 0 + assert count_for(Imports.Import, ctx.account) == 0 + assert count_for(Imports.ImportRecord, ctx.account) == 0 + + # Re-import: new contact + new import_record for the same Monica id 964 + import_b = import_fixture(ctx.account, ctx.user_id, %{source: "monica_api"}) + contact_b = contact_fixture(ctx.account) + {:ok, _} = Imports.record_imported_entity(import_b, "contact", "964", "contact", contact_b.id) + + # The photo sync lookup that previously found stale data now resolves to the new contact + assert %{local_entity_id: local_id} = + Imports.find_import_record(ctx.account, "monica_api", "contact", "964") + + assert local_id == contact_b.id +end +``` + +### Cross-account isolation test on the worker + +Snapshot-based: populate two accounts with data across every wiped domain, +reset one, assert the other's snapshot is bit-identical. + +```elixir +defp data_snapshot(account_id) do + %{ + contacts: count_for(Contacts.Contact, account_id), + imports: count_for(Imports.Import, account_id), + import_records: count_for(Imports.ImportRecord, account_id), + conversations: count_for(Conversations.Conversation, account_id), + tasks: count_for(Tasks.Task, account_id), + journal_entries: count_for(Journal.Entry, account_id), + reminders: count_for(Reminders.Reminder, account_id), + tags: count_for(Contacts.Tag, account_id), + activities: count_for(Activities.Activity, account_id), + audit_logs: count_for(AuditLogs.AuditLog, account_id) + } +end +``` + +Every new domain we wipe in the future adds one line to `data_snapshot/1` — +forgetting will cause the isolation test to fail loudly. + +### Idempotency tests + +Every Cleanup module: call `wipe_for_account/1` twice in a row; assert second +call returns `:ok` with zero counts (or whatever the second-call shape is). +Cheap, catches any assumption that the table has data. + +### What's NOT tested + +- Oban retry semantics — rely on the library's own coverage. +- Storage backend internals — `Kith.Storage.Local` and `Kith.Storage.S3` have + their own tests; `safe_delete_file/1`'s warn-on-error path is small enough + to verify by reading. + +## Migration / backwards compatibility + +No DB migrations required. All changes are at the Elixir module layer. + +Existing accounts in any state work with the new worker — including accounts +that already have orphaned `import_records` from prior resets. The next reset +will sweep them. + +## Files changed + +| File | Change | +|---|---| +| `lib/kith/activities/cleanup.ex` | NEW | +| `lib/kith/audit_logs/cleanup.ex` | NEW | +| `lib/kith/contacts/cleanup.ex` | NEW (handles contacts + tags) | +| `lib/kith/conversations/cleanup.ex` | NEW | +| `lib/kith/imports/cleanup.ex` | NEW | +| `lib/kith/imports/job_cancellation.ex` | NEW | +| `lib/kith/journal/cleanup.ex` | NEW | +| `lib/kith/reminders/cleanup.ex` | NEW | +| `lib/kith/storage/account_cleanup.ex` | NEW | +| `lib/kith/tasks/cleanup.ex` | NEW | +| `lib/kith/workers/account_reset_worker.ex` | REFACTOR — orchestrator only | +| `test/kith/activities/cleanup_test.exs` | NEW | +| `test/kith/audit_logs/cleanup_test.exs` | NEW | +| `test/kith/contacts/cleanup_test.exs` | NEW | +| `test/kith/conversations/cleanup_test.exs` | NEW | +| `test/kith/imports/cleanup_test.exs` | NEW | +| `test/kith/imports/job_cancellation_test.exs` | NEW | +| `test/kith/journal/cleanup_test.exs` | NEW | +| `test/kith/reminders/cleanup_test.exs` | NEW | +| `test/kith/storage/account_cleanup_test.exs` | NEW | +| `test/kith/tasks/cleanup_test.exs` | NEW | +| `test/kith/workers/account_reset_worker_test.exs` | EXTEND — add regression + isolation tests | + +## Verification + +1. `mix test` — 0 failures. +2. `mix quality` — clean (format + credo + sobelow + dialyzer). +3. Manual on dev: import Monica account, trigger reset via Settings → Account, + re-import the same Monica account, confirm photo sync now succeeds. +4. `tail -f log/dev.log | grep '\[AccountReset\|Cleanup\|JobCancellation\]'` + shows the structured per-step progress. + +## References + +- SOLID for Elixir standards: `07-Documentation/Standards/solid-principles-elixir.md` + (vault). Specifically §SRP-module ("god module" anti-pattern) and §OCP-decision-tree + for the function-naming-convention vs. behaviour trade-off. +- The bug surfaced in Monica re-import photo sync; root cause is the + `import_records.local_entity_id` polymorphic mapping with no DB-level FK. diff --git a/docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md b/docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md new file mode 100644 index 0000000..5e6d8b7 --- /dev/null +++ b/docs/superpowers/specs/2026-05-16-monica-import-deployment-fixes-design.md @@ -0,0 +1,283 @@ +# Monica Import Deployment Fixes — Design Spec + +**Date:** 2026-05-16 +**Status:** Approved (brainstorming) +**Branch:** `fix/duplicate-detection` + +## Problem statement + +In the production split-container deployment (`docker-compose.prod.yml`: separate `app` and `worker` services), the Monica importer crashes with: + +``` +** (ArgumentError) unknown registry: Kith.PubSub. Either the registry name is + invalid or the registry is not running, possibly because its application + isn't started + (phoenix_pubsub 2.2.0) lib/phoenix/pubsub.ex:232: Phoenix.PubSub.broadcast/4 + (kith 0.1.0) lib/kith/imports/sources/monica_api.ex:255: ... + (oban 2.20.3) lib/oban/queue/executor.ex:145: Oban.Queue.Executor.perform/1 +``` + +Root cause analysis identifies **two distinct bugs** that compound each other: + +### Bug A — PubSub not started in worker mode + +`lib/kith/application.ex` starts `{Phoenix.PubSub, name: Kith.PubSub}` only in +`mode_children/0`, which is `[]` when `KITH_MODE=worker`. Every import job that +broadcasts progress or completion (`maybe_broadcast_progress/4`, +`MonicaApiCrawlWorker` completion, `MonicaMiscDataWorker` completion) crashes +on the worker container. + +### Bug B — Oban runs on both containers without gating + +`config/config.exs` configures Oban with `queues:` and `plugins:` set +unconditionally. Both `app` and `worker` containers start the same Oban +supervisor and race for jobs via Postgres row-level locks. Symptoms: + +- When the **app** wins the race, the job runs to completion (PubSub works, + LiveView gets progress) — but jobs leak into the web-facing container, + defeating the split. +- When the **worker** wins the race, the job crashes on first broadcast + (Bug A), retries via Oban, eventually fails or gets re-claimed by the app. + +### Bug C — PubSub does not cross containers + +Fixing Bug A and Bug B alone causes a regression: with `KITH_MODE=web` Oban +gated off, only the worker processes jobs; but `Phoenix.PubSub` (default +`PG2` adapter) requires connected BEAM nodes to span containers, and the +current deployment has no Erlang clustering (`RELEASE_COOKIE` unset, no +`DNS_CLUSTER_QUERY`, no `libcluster`). LiveView subscribers in the `app` +container would never receive worker-emitted broadcasts. + +Three LiveViews depend on these broadcasts: + +- `lib/kith_web/live/import_wizard_live.ex` (subscribes line 79) +- `lib/kith_web/live/settings_live/import.ex` (subscribes line 37) +- `lib/kith_web/live/import_history_live/show.ex` (subscribes line 19) + +## Goals + +1. Worker container processes Monica imports without crashing. +2. Only the worker container runs Oban jobs in production. +3. LiveView subscribers in the `app` container receive progress and completion + broadcasts emitted by the `worker` container. +4. No regression to dev (single container via `mix phx.server` or + `docker-compose.dev.yml`) or test (`Oban.testing: :manual`) environments. + +## Non-goals + +- Multi-replica scaling (multiple `app` or multiple `worker` containers). This + design targets the user's stated 1+1 topology. The clustering approach + (DNSCluster + shared alias) extends naturally to multi-replica, but no + config or testing is included for that case. +- Multi-DC deployments. The PG2 adapter is single-region; cross-region would + need a Redis or Postgres-LISTEN adapter (deferred). +- Refactoring the Monica importer or misc worker beyond what these fixes + require. +- Replacing Phoenix.PubSub with an external broker. + +## Architecture + +### `lib/kith/application.ex` + +`Phoenix.PubSub` and `DNSCluster` move from `mode_children/0` to +`base_children/0`. These are application-layer concerns, not HTTP-layer: + +```elixir +defp base_children do + Kith.Geocoding.install_fuse() + Kith.Weather.install_fuse() + Kith.SentryEventHandler.attach() + :logger.add_handler(:sentry_handler, Sentry.LoggerHandler, %{}) + + [ + Kith.Vault, + Kith.Repo, + {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, + {Oban, Application.fetch_env!(:kith, Oban)}, + {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, + {Task.Supervisor, name: Kith.TaskSupervisor}, + {Phoenix.PubSub, name: Kith.PubSub}, + {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore} + ] +end + +defp mode_children do + case System.get_env("KITH_MODE", "web") do + "worker" -> + [] + + _web -> + [ + Kith.PromEx, + KithWeb.Telemetry, + KithWeb.Endpoint + ] + end +end +``` + +`KithWeb.Endpoint` references `pubsub_server: Kith.PubSub` (config.exs:79). +PubSub now starts strictly before Endpoint within `base_children` → ordering +is safe. + +### `config/runtime.exs` — Oban mode gating + +Added inside the existing `if config_env() == :prod do` block (near the +`# Rate limiting` section, around line 216): + +```elixir +# Oban — only the worker container processes jobs in production. +# The web container can call `Oban.insert/1` (queues are still defined +# by name in config.exs so insertion validates) but runs no queues or +# plugins. KITH_MODE=worker keeps the full config from config.exs. +case System.get_env("KITH_MODE", "web") do + "worker" -> + :ok + + _web -> + config :kith, Oban, queues: false, plugins: false +end +``` + +This is wrapped by the `:prod` env guard so dev (`mix phx.server`, +single-container `docker-compose.dev.yml`) is unaffected. Test env is +already pinned to `testing: :manual` in `config/test.exs:27`. + +### `docker-compose.prod.yml` — clustering + +Both `app` and `worker` services gain: + +```yaml +hostname: kith-app # or kith-worker +environment: + RELEASE_COOKIE: ${RELEASE_COOKIE} + RELEASE_DISTRIBUTION: name + DNS_CLUSTER_QUERY: kith-cluster +networks: + default: + aliases: + - kith-cluster +``` + +Mechanics: + +- `RELEASE_COOKIE` (same on both): the Erlang distribution shared secret. + Required env; if unset, BEAM generates a random one per container and + nodes can't connect. +- `RELEASE_DISTRIBUTION: name`: long-form node names use FQDN-style hostnames, + letting Docker DNS resolve them. +- `hostname: kith-app` / `kith-worker`: unique BEAM node hostnames. The + resulting node names are `kith@kith-app` and `kith@kith-worker`. +- `aliases: [kith-cluster]`: both containers register this alias in Docker's + embedded DNS. `kith-cluster` then resolves to **both** container IPs (Docker + returns all matching A records). +- `DNS_CLUSTER_QUERY: kith-cluster`: tells `Phoenix.DNSCluster` (already a dep) + to query that name on a periodic interval. Each result IP it doesn't already + see as a connected node gets `Node.connect/1`. Idempotent and self-healing. + +Once nodes are connected, `Phoenix.PubSub` with the default PG2 adapter +broadcasts cross-node automatically. No code changes elsewhere needed. + +### `.env.example` + +Add a `RELEASE_COOKIE` entry with generation instructions: + +```bash +# Erlang BEAM distribution cookie (shared between app and worker containers +# so they can cluster for cross-container PubSub). Generate with: +# mix phx.gen.secret 32 +# or: +# openssl rand -base64 32 +RELEASE_COOKIE=your-shared-cookie-here +``` + +Place it adjacent to `SECRET_KEY_BASE` in the secrets section. + +## Verification + +### Automated + +- Existing test suite continues to pass unchanged. PubSub is now started in + `base_children`, which already runs in test env via `Kith.DataCase`. The + Oban gating block is wrapped in `if config_env() == :prod` so test env is + not affected. + +### Manual (prod-like) + +```bash +# 1. Generate cookie +RELEASE_COOKIE=$(openssl rand -base64 32) +# add to .env + +# 2. Bring up the prod stack +docker compose -f docker-compose.prod.yml up --build + +# 3. Verify clustering +docker compose -f docker-compose.prod.yml exec app \ + /app/bin/kith eval 'IO.inspect(Node.list())' +# Expected: [:"kith@kith-worker"] + +docker compose -f docker-compose.prod.yml exec worker \ + /app/bin/kith eval 'IO.inspect(Node.list())' +# Expected: [:"kith@kith-app"] + +# 4. Verify Oban gating +docker compose -f docker-compose.prod.yml exec app \ + /app/bin/kith eval 'IO.inspect(Oban.config().queues)' +# Expected: [] or false (web is insert-only) + +docker compose -f docker-compose.prod.yml exec worker \ + /app/bin/kith eval 'IO.inspect(Oban.config().queues)' +# Expected: [default: 10, mailers: 10, ...] (full config) + +# 5. Trigger an import from the wizard UI; observe: +# - worker logs: MonicaApiCrawlWorker starts and progresses +# - app logs: no Oban executor logs +# - browser: LiveView progress bar updates in real time +# - browser: completion message renders when crawl finishes +``` + +### Failure modes to watch for + +- `RELEASE_COOKIE` unset → containers generate independent cookies → nodes + never connect. Symptom: `Node.list()` is empty on both, progress doesn't + cross. Fix: set the env var. +- Docker DNS returns only one IP for the alias → only one connection + direction works. Mitigation: DNSCluster polls periodically; the other + direction self-heals within a few seconds. Symptom of permanent breakage: + `Node.list()` empty on one container. +- Worker container started before app container's BEAM is ready → initial + cluster connect may fail, then succeed on the next DNSCluster poll. Not + user-visible because no import would be running during that window. + +## Trade-offs + +| Aspect | Cost | Mitigation | +|---|---|---| +| New env var `RELEASE_COOKIE` | One more secret to manage | Standard Erlang/Phoenix pattern; documented in `.env.example` | +| BEAM distribution exposed inside Docker network | Increases internal attack surface if Docker network is compromised | Network is internal-only (no published ports); cookie is opaque to anyone without the secret | +| DNSCluster polling overhead | One DNS query every 5s per container | Negligible; same as existing Phoenix-stack pattern | +| Bound to 1-app-1-worker topology for now | Multi-replica needs further testing | Documented as non-goal; DNSCluster + alias extends naturally | + +## Implementation order + +1. `lib/kith/application.ex` — move PubSub + DNSCluster to base_children. Tests + pass. +2. `config/runtime.exs` — add Oban gating block. Tests pass (gated by `:prod`). +3. `docker-compose.prod.yml` — add hostname, env vars, network alias. +4. `.env.example` — document RELEASE_COOKIE. +5. Manual smoke test per the verification section above. + +Each step is independently committable; an intermediate state (e.g. step 1+2 +without step 3) is "worker no longer crashes, race condition remains" — a +strict improvement over the current state. + +## Out of scope (future work) + +- Multi-replica web/worker scaling +- Replacing PG2 PubSub with Redis or Postgres for cross-DC support +- Health checks for cluster connection state (could surface a degraded mode + indicator in the import history UI) +- Migrating `Phoenix.PubSub.broadcast` call sites in the import path to a + thin wrapper that logs broadcasts (helpful for ops debugging, but not + required for correctness) diff --git a/docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md b/docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md new file mode 100644 index 0000000..e0d3c90 --- /dev/null +++ b/docs/superpowers/specs/2026-05-16-monica-import-perf-fix-design.md @@ -0,0 +1,313 @@ +# Monica import performance fix — design + +**Status:** approved +**Date:** 2026-05-16 +**Branch:** `fix/duplicate-detection` +**Builds on:** commit `6af91bf` (the bug-fix that unleashed Phase 4) + +## Context + +The bug-fix commit `6af91bf` restored a previously broken contract: `MonicaApiCrawlWorker.build_opts/1` now forwards every wizard option to `MonicaApi.crawl/5`, not just `extra_notes`. That fix was correct — auto-merge, pets, calls, activities, gifts, debts, tasks, reminders, conversations were all designed to be controllable from the wizard, but `build_opts/1` was silently dropping them. + +The consequence: a per-contact phase that had been an unreachable no-op for the wizard UI's entire lifetime suddenly fires **eight HTTP endpoints per imported contact**. For a 1000-contact account that is 8000 API calls against Monica's default 60-req/min rate limit. Imports went from ~2 minutes (Phase 1 only) to multi-hour stalls, made worse by a double retry layer that resets its inner counter every outer retry — the symptom users see as "retry: got response with status 429, will retry in 59000ms, 3 attempts left" repeating forever. + +This design fixes that, plus a small handful of pre-existing and self-inflicted perf issues that compound the problem. + +### Problems being addressed + +1. **Phase 4 explosion** *(primary user-visible regression)* + `import_extra_data_types/5` walks every imported contact and fires up to 8 endpoints per contact unconditionally. There is no statistics-based short-circuit (unlike Phase 3 for notes), so contacts with zero pets/debts/gifts still incur a round-trip per endpoint. With ~1000 contacts × 8 endpoints = 8000 calls, the import cannot complete under Monica's default rate limit in reasonable wall-clock time. + +2. **Double retry layering** *(amplifier)* + `api_get_json_with_retry/4` (custom 65-second sleep loop, max 3 outer retries) wraps `Req.get`, which itself has built-in `:safe_transient` retry (max 3 inner retries, respects `Retry-After`). On a 429 these stack: up to 12 retry rounds for a single logical call, with up to ~12 minutes of cumulative sleep. The "3 attempts left always" log is the outer layer kicking off fresh inner-layer attempts. + +3. **No proactive throttle** *(amplifier)* + We make calls as fast as the BEAM lets us until Monica refuses. Every 429 burst then wastes a 59-second `Retry-After` window before traffic resumes. + +4. **`:persistent_term` global-GC storm** *(self-inflicted in PR 6af91bf)* + `phone_field_type?/1` caches one boolean per cft_id via `:persistent_term.put/2`. Each new key triggers a global GC of every BEAM process. On a cold import we warm 5-8 cft_ids back-to-back, stopping the world (LiveView, PubSub, PromEx, every Oban worker) each time. + +5. **Double libphonenumber normalization** *(self-inflicted)* + `MonicaApi.import_single_contact_field` normalizes via `PhoneFormatter.normalize/2`, then `Contacts.create_contact_field/2` calls `maybe_normalize_phone/1` which normalizes the already-canonical value again. Per phone field this is one wasted libphonenumber parse plus one wasted `Repo.get(ContactFieldType, cft_id)` DB round trip. + +6. **Pre-existing per-write `Repo.get` in `maybe_normalize_phone`** *(amplifier, not in scope here)* + `Contacts.maybe_normalize_phone/1` looks up the ContactFieldType per call to discover the protocol. For 5000 field writes that's 5000 DB queries. Not introduced by recent work; we sidestep it for the Monica path only. + +## Goal + +1. Restore the Monica import to a reasonable wall-clock runtime — Phase 1+2+3 should complete in minutes for ~1000 contacts; Phase 4 should run in the background and only fire endpoints that actually have data. +2. Eliminate the double-retry layering so a single 429 doesn't cascade into multi-minute log loops. +3. Add a client-side throttle so 429s become rare under normal Monica defaults. +4. Pay back the perf debt introduced in commit `6af91bf` (persistent_term GC storm, double normalization). + +**Out of scope** (noted for follow-up): +- Fixing `maybe_normalize_phone`'s per-write `Repo.get` for UI/API callers (still a per-field DB query for non-Monica paths, but Monica is the only path that creates fields at bulk scale). +- Account-locale-derived region for UI form phone writes (currently UI writes leave bare numbers untouched). +- Auto-detection of Monica's actual rate limit (defaults are hand-configured). +- Batched per-contact fetches via Monica's `?include=...` if/when supported. + +## Approach + +### Part 1 — Extract Phase 4 into a dedicated worker + +**New worker:** `Kith.Workers.MonicaMiscDataWorker`, queue `:imports`, max attempts 3, timeout 30 minutes. + +A single Oban job per import (not per contact). Takes args: +```elixir +%{ + "import_id" => integer, + "credential_url" => string, + "credential_api_key" => string, # same wipe-after-completion pattern as MonicaPhotoSyncWorker + "plan" => [%{"source_id" => integer, "local_id" => integer, "endpoints" => [string]}, ...] +} +``` + +Worker logic: +1. Load `import_job`; bail early if `status == "cancelled"`. +2. Iterate `plan` entries. +3. For each entry, load the local contact (`Repo.get`); skip if `deleted_at != nil`. +4. For each endpoint in the entry's list, call the corresponding fetch helper (e.g. `import_contact_pets/6`). +5. Accumulate per-endpoint counts (e.g. `%{pets: 17, calls: 4, activities: 0, ...}`). +6. After completion, update `import_job.summary` with a new `"misc"` key holding the counts. Broadcast via the existing PubSub topic `"import:#{account_id}"` so the wizard UI sees the update. + +The per-contact `import_contact_pets/calls/activities/gifts/debts/tasks/reminders/conversations` helpers move verbatim from `MonicaApi` into the new worker module. Their internals are unchanged. + +**`MonicaApi.crawl/5` changes:** +- Phase 4 (`import_extra_data_types/5`) deleted. +- The `crawl/5` return value's `summary` map gains a new key `:misc_data` (the plan list). Caller `MonicaApiCrawlWorker` consumes this to construct the misc worker's args. + +**`MonicaApiCrawlWorker.perform/1` changes:** +- After `Imports.update_import_status(:completed)`, alongside the existing `MonicaPhotoSyncWorker` enqueue, enqueue `MonicaMiscDataWorker` with the plan from `summary[:misc_data]`. +- The plan is removed from the persisted summary before writing (it's transit data, not a metric). + +### Part 2 — Throttle: Hammer-backed rate limiter + +**New module:** `Kith.Imports.Sources.MonicaApi.RateLimiter`. + +Single public function `wait!(host)`. Wraps `Hammer.check_rate(bucket, scale_ms, limit)` with: +- Bucket key: `"monica_api:#{URI.parse(url).host}"` — per-host so independent Monica instances don't share a quota. +- Scale: 60_000 ms. +- Limit: configurable via `Application.get_env(:kith, :monica_rate_limit, 55)`. + +55 is one token below Monica's documented default of 60/min, leaving safety margin. + +On `{:deny, _}`, sleep ~1100ms and recurse. (The `:deny` carries a retry-after but Hammer 6.x returns the bucket reset time, which can be over-conservative; a fixed small sleep paces us back into the window naturally.) + +**Call site:** `MonicaApi.api_get/3` calls `RateLimiter.wait!(credential.url)` before every `Req.get`. The new misc worker's helpers go through the same `api_get`, so they're throttled too. + +**Config:** +- `config/config.exs`: `config :kith, :monica_rate_limit, 55`. +- `config/test.exs`: `config :kith, :monica_rate_limit, 1_000_000` — effectively unthrottled so tests don't sleep. Throttle logic itself is exercised via its own test file with a temporarily lowered limit. + +### Part 3 — Retry: collapse to Req's built-in only + +**Delete:** +- `MonicaApi.api_get_json_with_retry/4` (lines 1109-1137 of current monica_api.ex) +- `@max_rate_limit_retries 3` +- `@rate_limit_sleep_ms :timer.seconds(65)` + +**Replace** `api_get_json/3` with a direct version: +```elixir +defp api_get_json(credential, url, params) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: status}} -> {:error, {:http, status}} + {:error, reason} -> {:error, reason} + end +end +``` + +**Update** `api_get/3` to configure Req's retry behavior explicitly: +```elixir +defp api_get(credential, url, params \\ []) do + RateLimiter.wait!(credential.url) + + options = [ + headers: [...], + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ Map.get(credential, :req_options, []) + + Req.get(url, options) +end +``` + +`max_retries: 5` (up from the implicit Req default of 3) so a sustained slow window doesn't terminate the call. Req's `:safe_transient` retry handles 429/5xx and respects `Retry-After` natively. + +The two error tuples that previously distinguished `:rate_limited` from other errors are no longer needed — `{:error, {:http, 429}}` is now self-describing and bubbles up to the same caller error-handling that already exists. + +### Part 4 — Statistics short-circuit + misc plan + +**New helper** `collect_misc_data/5` in `MonicaApi`, called inside the contact loop alongside the existing `collect_extra_notes/3`: + +```elixir +@misc_endpoints [ + {:calls, "number_of_calls"}, + {:activities, "number_of_activities"}, + {:gifts, "number_of_gifts"}, + {:debts, "number_of_debts"}, + {:tasks, "number_of_tasks"}, + {:reminders, "number_of_reminders"}, + {:conversations, "number_of_conversations"} +] + +defp collect_misc_data(deferred, api_contact, source_id, local_id, opts) do + stats = api_contact["statistics"] || %{} + + endpoints = + @misc_endpoints + |> Enum.filter(fn {key, stat_field} -> + opts[Atom.to_string(key)] != false and (stats[stat_field] || 0) > 0 + end) + |> Enum.map(&elem(&1, 0)) + + endpoints = if opts["pets"] != false, do: [:pets | endpoints], else: endpoints + + if endpoints == [] do + deferred + else + entry = %{source_id: source_id, local_id: local_id, endpoints: endpoints} + %{deferred | misc_data: [entry | deferred.misc_data]} + end +end +``` + +Rules: +- Endpoint is included only if (a) the wizard opt for that data type is not false **and** (b) Monica's stat field reports > 0 (or, for pets, the wizard opt is on — pets has no statistic field in Monica's payload). +- Contact contributes zero entries if every endpoint is filtered out — it's not even in the plan. +- Stat absent in payload is treated as ">0" (safer default; we'd rather make a wasted call than miss data). + +`deferred` (already threaded through `crawl/5`) gains a new key `misc_data: []`. After the contact loop completes, `deferred.misc_data` is the plan list passed to the misc worker. + +### Part 5 — Self-inflicted perf debt cleanup + +#### 5a. Replace `:persistent_term` cache with `ref_data` MapSet + +**Delete** `phone_field_type?/1` and `phone_field_type?(nil)` clauses in `monica_api.ex`. + +**Extend** `ref_data` from: +```elixir +%{contact_field_types: %{name => id}} +``` +to: +```elixir +%{ + contact_field_types: %{name => id}, + phone_cft_ids: MapSet.t() +} +``` + +`build_or_update_ref_data/3` computes `phone_cft_ids` once per ref_data refresh (1-2 queries per entire import, vs 5-8 GC-triggering `:persistent_term.put` calls). + +**Update** `normalize_field_value/3` to take `ctx` (already in scope at the caller) instead of just `opts`: +```elixir +defp normalize_field_value(value, cft_id, ctx) when is_binary(value) do + if MapSet.member?(ctx.ref_data.phone_cft_ids, cft_id) do + region = parse_region(ctx.opts["phone_default_region"]) + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value + end +end +``` + +#### 5b. Bypass `Contacts.maybe_normalize_phone` from Monica path + +**Extend** `Contacts.create_contact_field/2` to `create_contact_field/3` with `opts \\ []`: +```elixir +def create_contact_field(%Contact{} = contact, attrs, opts \\ []) do + attrs = if Keyword.get(opts, :normalize, true), do: maybe_normalize_phone(attrs), else: attrs + + %ContactField{contact_id: contact.id, account_id: contact.account_id} + |> ContactField.changeset(attrs) + |> Repo.insert() +end +``` + +Default `normalize: true` preserves behavior for UI/API callers (one line touched in `monica_api.ex` to pass `normalize: false`). + +This eliminates ~2000 redundant libphonenumber parses **and** ~5000 redundant `Repo.get(ContactFieldType, cft_id)` queries per typical 1000-contact import — all on the Monica path only. UI form path is unchanged. + +## Files to modify + +**Production code:** +- `lib/kith/imports/sources/monica_api/rate_limiter.ex` *(new)* — Hammer-backed throttle. +- `lib/kith/workers/monica_misc_data_worker.ex` *(new)* — Phase 4 worker. +- `lib/kith/imports/sources/monica_api.ex` — Phase 4 removed; `collect_misc_data/5` added; `phone_field_type?/1` deleted; `api_get_json_with_retry/4` deleted; `api_get/3` wraps `RateLimiter.wait!`; `normalize_field_value/3` takes `ctx`; `ref_data` extended with `phone_cft_ids`; `build_or_update_ref_data/3` computes that field; per-contact endpoint helpers (`import_contact_pets/calls/activities/gifts/debts/tasks/reminders/conversations`) relocated to the misc worker module. +- `lib/kith/workers/monica_api_crawl_worker.ex` — enqueues `MonicaMiscDataWorker` on successful completion. +- `lib/kith/contacts.ex` — `create_contact_field/2` → `create_contact_field/3` with `normalize: true` default. +- `config/config.exs` — `config :kith, :monica_rate_limit, 55`. +- `config/test.exs` — high override so tests don't sleep on the throttle. + +**Tests:** +- `test/kith/imports/sources/monica_api/rate_limiter_test.exs` *(new)* — under-limit allows, over-limit waits, per-host isolation, env override. +- `test/kith/workers/monica_misc_data_worker_test.exs` *(new)* — worker fires only planned endpoints; cancelled import skipped; summary populated; cred carried through args. +- `test/kith/imports/sources/monica_api_test.exs` *(extend)* — `crawl/5` enqueues misc worker with right plan; statistics-zero excluded; statistics-missing included; opt-outs honored; no per-contact endpoint stubs hit during main crawl. +- `test/kith/workers/monica_api_crawl_worker_test.exs` *(extend)* — boundary test for the enqueue. +- `test/kith/contacts_test.exs` *(extend or add)* — `create_contact_field/3` with `normalize: false` bypasses `maybe_normalize_phone`. + +## Existing functions to reuse + +- `MonicaPhotoSyncWorker` (`lib/kith/workers/monica_photo_sync_worker.ex`) — pattern for "enqueue from main crawl, carry credential through args, check `import.status` at top of `perform/1`, single Oban job that iterates contacts internally." +- `Imports.update_import_status/3` (existing) — pattern for writing `summary` updates that trigger the existing PubSub broadcast. +- `Phase 3 collect_extra_notes/3` (`lib/kith/imports/sources/monica_api.ex:583-599`) — pattern for "inspect statistics in the contact loop, accumulate a deferred entry, process after main crawl." +- `Hammer.check_rate/3` (existing dep) — token bucket primitive for the throttle. +- `Req`'s `:safe_transient` retry + `Retry-After` handling (default behavior) — single source of truth for retry logic after we delete the hand-rolled wrapper. + +## Verification + +1. **Unit tests:** + ``` + mix test test/kith/imports/sources/monica_api/rate_limiter_test.exs \ + test/kith/workers/monica_misc_data_worker_test.exs \ + test/kith/imports/sources/monica_api_test.exs \ + test/kith/workers/monica_api_crawl_worker_test.exs \ + test/kith/contacts_test.exs + ``` + All green. + +2. **Static analysis:** `mix quality` — no new credo issues, no new dialyzer warnings beyond the existing `.dialyzer_ignore.exs` entries. + +3. **Manual dev test:** + - Reset dev account via `Kith.Workers.AccountResetWorker`. + - Re-import ~1000 Monica contacts with default wizard options (all 8 misc data types checked). + - **`MonicaApiCrawlWorker` should complete in well under 2 minutes** (Phase 1 paginated calls + auto-merge + cross-refs + extra notes — bounded by ~20-30 throttled requests). + - **`MonicaMiscDataWorker` should complete in single-digit minutes** for typical CRM data (most contacts have nothing in pets/debts/gifts). + - Logs should show **zero** `"retry: got response with status 429"` messages under normal Monica defaults. + - `import_job.summary` after main worker: `imported`, `merged`, `contacts`, `notes` populated. + - `import_job.summary["misc"]` after misc worker: per-endpoint counts. + - Duplicates tab: a small number of pending candidates, not 6000 (this validates the earlier bug-fix is still working). + +4. **Oban dashboard:** + - `MonicaApiCrawlWorker` job completes and disappears from `executing`. + - `MonicaMiscDataWorker` job appears separately, runs to completion. + - Both individually cancellable. + +5. **Rate limiter sanity (optional IEx):** + ```elixir + times = for _ <- 1..70 do + {time_us, _} = :timer.tc(fn -> + Kith.Imports.Sources.MonicaApi.RateLimiter.wait!("https://test.monica") + end) + time_us / 1_000 + end + {Enum.take(times, 55) |> Enum.sum(), Enum.drop(times, 55) |> Enum.sum()} + ``` + First 55 calls should be near-zero; remaining 15 should be ~1100ms each. + +## Risks + +- **Plan size in Oban args.** For typical CRMs the plan is small (5-15% of contacts contribute entries). At ~100k+ contact scale the args could grow large; if that happens, swap to a `misc_data_plan` jsonb column on `imports` and pass only `import_id`. Localized change, two lines. +- **Phase 4 status visibility.** Users see "import complete" when the main crawl finishes; misc data trickles in afterward. The wizard's PubSub channel already broadcasts summary updates, but the "complete" copy doesn't currently distinguish between "fully done" and "main done, misc running." Consider a UI follow-up: show a second progress line for `misc_data` if `summary["misc"]` is absent. +- **Misc worker cancellation race.** If the user cancels the import between main-crawl completion and the misc worker picking up the job, the misc worker checks `import.status == "cancelled"` at the top of `perform/1` and exits cleanly. If cancellation happens *mid-run*, the in-flight request finishes but no further requests fire. Same model as `MonicaPhotoSyncWorker` today. +- **Throttle starvation across concurrent imports.** If two users on the same Monica instance import simultaneously, they share the per-host bucket. Each gets ~half the throughput. Acceptable — Monica's actual limit is the shared resource anyway. +- **The pre-existing `maybe_normalize_phone` N+1 remains for UI form callers.** Not in scope; tracked as future work. Practical impact is invisible because UI writes happen one at a time. + +## Non-goals + +- Account-locale-derived region applied to UI form phone writes (separate change, larger surface). +- Hammer auto-detection of Monica's actual rate limit (Monica doesn't expose this in headers). +- Batched per-contact data fetches via Monica `?include=` query parameter (Monica's API doesn't currently support multi-resource includes for these endpoints). +- Splitting the misc worker into per-endpoint sub-workers (premature; single worker is simpler and the per-endpoint counts are already preserved). diff --git a/lib/kith/activities/cleanup.ex b/lib/kith/activities/cleanup.ex new file mode 100644 index 0000000..4f4c450 --- /dev/null +++ b/lib/kith/activities/cleanup.ex @@ -0,0 +1,21 @@ +defmodule Kith.Activities.Cleanup do + @moduledoc """ + Wipes all account-scoped activities for a single account. Activities have + no contact FK so they are not cleared by `Kith.Contacts.Cleanup`'s CASCADE. + """ + + alias Kith.Activities.Activity + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in Activity, where: a.account_id == ^account_id)) + + Logger.info("[Activities.Cleanup] wiped #{count} activit(ies) for account #{account_id}") + :ok + end +end diff --git a/lib/kith/application.ex b/lib/kith/application.ex index 5ce9f9e..af8913b 100644 --- a/lib/kith/application.ex +++ b/lib/kith/application.ex @@ -26,7 +26,17 @@ defmodule Kith.Application do {Finch, name: Swoosh.Finch, pools: %{:default => [size: 10]}}, {Oban, Application.fetch_env!(:kith, Oban)}, {Cachex, name: :kith_cache, expiration: expiration(default: :timer.hours(24))}, - {Task.Supervisor, name: Kith.TaskSupervisor} + {Task.Supervisor, name: Kith.TaskSupervisor}, + # PubSub lives here (not in mode_children) so worker mode also starts + # it. Required for cross-container progress broadcasts in the + # split-deployment topology (app + worker containers). + {Phoenix.PubSub, name: Kith.PubSub}, + # libcluster: connects this BEAM node to its peer(s) so PubSub spans + # containers. Topology is configured at runtime via env-driven config + # in `runtime.exs`; when no peers are set (dev/test), this supervisor + # starts but does nothing. + {Cluster.Supervisor, + [Application.get_env(:libcluster, :topologies, []), [name: Kith.ClusterSupervisor]]} ] end @@ -39,8 +49,6 @@ defmodule Kith.Application do [ Kith.PromEx, KithWeb.Telemetry, - {DNSCluster, query: Application.get_env(:kith, :dns_cluster_query) || :ignore}, - {Phoenix.PubSub, name: Kith.PubSub}, KithWeb.Endpoint ] end diff --git a/lib/kith/audit_logs/cleanup.ex b/lib/kith/audit_logs/cleanup.ex new file mode 100644 index 0000000..1753f9e --- /dev/null +++ b/lib/kith/audit_logs/cleanup.ex @@ -0,0 +1,22 @@ +defmodule Kith.AuditLogs.Cleanup do + @moduledoc """ + Wipes all audit logs for a single account. Runs LAST in the reset pipeline + so the "account_data_reset" log written at the start of the worker lives + until the rest of cleanup completes. + """ + + alias Kith.AuditLogs.AuditLog + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(a in AuditLog, where: a.account_id == ^account_id)) + + Logger.info("[AuditLogs.Cleanup] wiped #{count} audit log(s) for account #{account_id}") + :ok + end +end diff --git a/lib/kith/cldr.ex b/lib/kith/cldr.ex index bb8891b..1da9647 100644 --- a/lib/kith/cldr.ex +++ b/lib/kith/cldr.ex @@ -7,5 +7,5 @@ defmodule Kith.Cldr do use Cldr, locales: ["en", "ar", "fr", "de", "es", "pt", "ja", "zh"], default_locale: "en", - providers: [Cldr.Number, Cldr.DateTime, Cldr.Calendar] + providers: [Cldr.Number, Cldr.DateTime, Cldr.Calendar, Cldr.Territory] end diff --git a/lib/kith/contacts.ex b/lib/kith/contacts.ex index 1427759..d5c7bef 100644 --- a/lib/kith/contacts.ex +++ b/lib/kith/contacts.ex @@ -387,8 +387,24 @@ defmodule Kith.Contacts do ContactField |> scope_to_account(account_id) |> Repo.get!(id) end - def create_contact_field(%Contact{} = contact, attrs) do - attrs = maybe_normalize_phone(attrs) + @doc """ + Fetch a contact by ID without scope enforcement, for use by the + Monica misc-data worker. The worker has already verified the contact + belongs to an import the user authorized; we just need the row. + + Returns `nil` if not found. + """ + def get_contact_for_misc(id) when is_integer(id) or is_binary(id) do + Repo.get(Contact, id) + end + + def create_contact_field(%Contact{} = contact, attrs, opts \\ []) do + attrs = + if Keyword.get(opts, :normalize, true) do + maybe_normalize_phone(attrs) + else + attrs + end %ContactField{contact_id: contact.id, account_id: contact.account_id} |> ContactField.changeset(attrs) @@ -1740,14 +1756,28 @@ defmodule Kith.Contacts do end, set: [contact_id: survivor.id] ) - # Remap photos - |> Ecto.Multi.update_all( - :remap_photos, - fn _changes -> + # Remap photos (delete duplicates by content_hash first, then move remaining) + |> Ecto.Multi.run(:remap_photos, fn repo, _changes -> + # Delete photos from non-survivor that already exist on survivor (same content_hash) + repo.query( + """ + DELETE FROM photos + WHERE contact_id = $1 + AND content_hash IS NOT NULL + AND content_hash IN ( + SELECT content_hash FROM photos WHERE contact_id = $2 AND content_hash IS NOT NULL + ) + """, + [non_survivor.id, survivor.id] + ) + + # Move remaining photos + {count, _} = from(p in Photo, where: p.contact_id == ^non_survivor.id) - end, - set: [contact_id: survivor.id] - ) + |> repo.update_all(set: [contact_id: survivor.id]) + + {:ok, count} + end) # Remap addresses |> Ecto.Multi.update_all( :remap_addresses, diff --git a/lib/kith/contacts/cleanup.ex b/lib/kith/contacts/cleanup.ex new file mode 100644 index 0000000..b4ba270 --- /dev/null +++ b/lib/kith/contacts/cleanup.ex @@ -0,0 +1,59 @@ +defmodule Kith.Contacts.Cleanup do + @moduledoc """ + Hard-deletes all contacts (and CASCADE sub-entities) and account-scoped + tags for a single account. + + Sub-entities cleared via FK CASCADE: addresses, contact_fields, photos + (rows), documents (rows), notes, debts, gifts, pets, emotions, + relationships, calls, life_events, duplicate_candidates, immich_candidates. + + Note: `Kith.Storage.AccountCleanup` MUST run before this module so that + photo/document storage_keys can be enumerated before their rows are wiped. + + Tags are wiped here (not in a separate module) because they share the + contacts axis-of-change and have no other purpose. + """ + + alias Kith.Contacts.{Contact, Tag} + alias Kith.Repo + + import Ecto.Query + require Logger + + @batch_size 200 + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + contacts_deleted = delete_contacts_in_batches(account_id, 0) + + {tags_deleted, _} = + Repo.delete_all(from(t in Tag, where: t.account_id == ^account_id)) + + Logger.info( + "[Contacts.Cleanup] hard-deleted #{contacts_deleted} contact(s) + " <> + "#{tags_deleted} tag(s) for account #{account_id}" + ) + + :ok + end + + defp delete_contacts_in_batches(account_id, acc) do + ids = + Repo.all( + from(c in Contact, + where: c.account_id == ^account_id, + select: c.id, + limit: @batch_size + ) + ) + + case ids do + [] -> + acc + + _ -> + {deleted, _} = Repo.delete_all(from(c in Contact, where: c.id in ^ids)) + delete_contacts_in_batches(account_id, acc + deleted) + end + end +end diff --git a/lib/kith/contacts/phone_formatter.ex b/lib/kith/contacts/phone_formatter.ex index 1532732..4517e15 100644 --- a/lib/kith/contacts/phone_formatter.ex +++ b/lib/kith/contacts/phone_formatter.ex @@ -1,62 +1,170 @@ defmodule Kith.Contacts.PhoneFormatter do @moduledoc """ - Phone number normalization and formatting. + Phone number normalization (E.164 for storage) and display formatting. - Stores numbers in a normalized form internally (E.164 when possible), - formats for display according to account preference. + Storage form is E.164 when the value can be parsed as a valid international + number — either because it carries a `+` country-code prefix, or because the + caller supplies a `default_region` (ISO 3166-1 alpha-2) for bare numbers. + Unparseable input is returned trimmed-but-otherwise-unchanged so user data + is never silently destroyed. + + Display formatting (`format/2`) reads the account's `phone_format` + preference and renders the stored E.164 value as national/international/raw. """ + alias ExPhoneNumber + + @typedoc "ISO 3166-1 alpha-2 region code or `nil` to skip bare-number parsing." + @type region :: String.t() | nil + @doc """ Normalize a phone number for storage. - Strips non-digit characters (preserving leading +), applies best-effort - country code detection for bare numbers. - - Returns `{:ok, normalized}` or `{:ok, nil}` for blank input. + Equivalent to `normalize/2` with no default region — bare numbers (without + a `+` prefix) are returned trimmed-only. Use the 2-arity form from import + paths that know the user's preferred region. """ - def normalize(nil), do: {:ok, nil} - def normalize(""), do: {:ok, nil} + @spec normalize(String.t() | nil) :: {:ok, String.t() | nil} + def normalize(value), do: normalize(value, nil) + + @doc """ + Normalize a phone number to E.164 for storage. + + * `value` — raw user / import input. + * `default_region` — ISO 3166-1 alpha-2 region (e.g. `"US"`, `"FR"`) used + to parse bare numbers without a `+` prefix. Pass `nil` to leave bare + numbers unchanged (only `+`-prefixed input is parsed). - def normalize(phone) when is_binary(phone) do - stripped = String.trim(phone) + Returns `{:ok, normalized}` where `normalized` is the canonical E.164 form, + the original trimmed string if parsing fails, or `nil` for blank input. + """ + @spec normalize(String.t() | nil, region) :: {:ok, String.t() | nil} + def normalize(nil, _), do: {:ok, nil} + def normalize("", _), do: {:ok, nil} - has_plus = String.starts_with?(stripped, "+") - digits = String.replace(stripped, ~r/[^\d]/, "") + def normalize(value, default_region) when is_binary(value) do + trimmed = String.trim(value) + has_plus = String.starts_with?(trimmed, "+") + region = if has_plus, do: nil, else: default_region cond do - digits == "" -> + trimmed == "" -> {:ok, nil} - has_plus -> - {:ok, "+" <> digits} + not has_plus and is_nil(region) -> + {:ok, trimmed} + + true -> + parse_to_e164(trimmed, region) + end + end + + defp parse_to_e164(trimmed, region) do + # Format-on-parse, not format-on-valid. libphonenumber's `is_valid_number?` + # rejects valid-but-uncommon inputs (NANP "555" test prefixes, recently + # allocated area codes, vanity numbers, region-specific oddities). Users' + # personal-CRM data is exactly that messy; refusing to canonicalize + # parseable-but-not-strictly-valid numbers re-introduces the mixed-storage + # problem detection is supposed to solve. We keep the parse check so that + # truly malformed input (`"garbage"`, `"+"`) round-trips unchanged. + case ExPhoneNumber.parse(trimmed, region) do + {:ok, parsed} -> {:ok, ExPhoneNumber.format(parsed, :e164)} + {:error, _} -> {:ok, trimmed} + end + end + + @doc """ + Map an account `locale` to a best-guess ISO 3166-1 alpha-2 region code. - # Bare 10-digit number — could be many countries, store as-is - String.length(digits) == 10 -> - {:ok, digits} + Returns `nil` when the locale doesn't map cleanly — callers should treat + `nil` as "don't normalize bare numbers" and prompt the user to pick. + """ + @spec region_for_locale(String.t() | nil) :: region + def region_for_locale(nil), do: nil + + def region_for_locale(locale) when is_binary(locale) do + locale + |> String.split(~r/[-_]/) + |> List.first() + |> String.downcase() + |> language_to_region() + end - # US/Canada: 11-digit starting with 1 - String.length(digits) == 11 and String.starts_with?(digits, "1") -> - {:ok, "+" <> digits} + defp language_to_region("en"), do: "US" + defp language_to_region("fr"), do: "FR" + defp language_to_region("de"), do: "DE" + defp language_to_region("es"), do: "ES" + defp language_to_region("it"), do: "IT" + defp language_to_region("pt"), do: "PT" + defp language_to_region("nl"), do: "NL" + defp language_to_region("ru"), do: "RU" + defp language_to_region("ja"), do: "JP" + defp language_to_region("zh"), do: "CN" + defp language_to_region("ko"), do: "KR" + defp language_to_region("ar"), do: "SA" + defp language_to_region(_), do: nil - # International: 7+ digits, assume needs + - String.length(digits) >= 7 -> - {:ok, "+" <> digits} + @doc """ + List every parser-supported region with its localized country name and + calling code, sorted by display name. - # Too short to normalize meaningfully - true -> - {:ok, stripped} + Returns `[{region_code, label}]` — e.g. + `[{"AF", "Afghanistan (+93)"}, {"AL", "Albania (+355)"}, ...]` + + The intersection of `ExPhoneNumber.Metadata.get_supported_regions/0` + (regions the parser can actually handle) and + `Cldr.Territory.country_codes/1` (real ISO 3166-1 countries, not + continents) is computed once per locale and cached via `:persistent_term` + to keep wizard mounts fast. + """ + @spec supported_regions(String.t()) :: [{String.t(), String.t()}] + def supported_regions(locale \\ "en") do + case :persistent_term.get({__MODULE__, :regions, locale}, :miss) do + :miss -> + regions = build_supported_regions(locale) + :persistent_term.put({__MODULE__, :regions, locale}, regions) + regions + + regions -> + regions end end + defp build_supported_regions(locale) do + parser_supported = + ExPhoneNumber.Metadata.get_supported_regions() + |> MapSet.new() + + Cldr.Territory.country_codes(as: :binary) + |> Enum.filter(&MapSet.member?(parser_supported, &1)) + |> Enum.map(&{&1, region_label(&1, locale)}) + |> Enum.sort_by(fn {_code, label} -> label end, :asc) + end + + defp region_label(code, locale) do + name = + case Kith.Cldr.Territory.from_territory_code( + String.to_atom(code), + locale: locale, + style: :standard + ) do + {:ok, localized} -> localized + _ -> code + end + + calling_code = ExPhoneNumber.Metadata.get_country_code_for_region_code(code) + "#{name} (+#{calling_code})" + end + @doc """ - Format a normalized phone number for display. + Format a stored phone number for display according to the account preference. ## Formats - * `"e164"` — E.164 as-is: `+12345678901` - * `"national"` — US/Canada national: `(234) 567-8901` - * `"international"` — International: `+1 234-567-8901` - * `"raw"` — Return as-is, no formatting + * `"e164"` — E.164 as-is: `+12025550100` + * `"national"` — US/Canada national: `(202) 555-0100` + * `"international"` — International: `+1 202-555-0100` + * `"raw"` — return the stored value unchanged """ def format(nil, _format), do: nil def format(phone, "raw"), do: phone @@ -65,7 +173,6 @@ defmodule Kith.Contacts.PhoneFormatter do def format(phone, "international"), do: format_international(phone) def format(phone, _), do: phone - # US/Canada: +1 followed by 10 digits defp format_national( <<"+"::utf8, ?1, area::binary-size(3), prefix::binary-size(3), line::binary-size(4)>> ) diff --git a/lib/kith/conversations/cleanup.ex b/lib/kith/conversations/cleanup.ex new file mode 100644 index 0000000..e5005b2 --- /dev/null +++ b/lib/kith/conversations/cleanup.ex @@ -0,0 +1,24 @@ +defmodule Kith.Conversations.Cleanup do + @moduledoc """ + Wipes all conversations for a single account. FK CASCADE removes the + associated `messages` rows. + """ + + alias Kith.Conversations.Conversation + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(c in Conversation, where: c.account_id == ^account_id)) + + Logger.info( + "[Conversations.Cleanup] wiped #{count} conversation(s) for account #{account_id}" + ) + + :ok + end +end diff --git a/lib/kith/duplicate_detection.ex b/lib/kith/duplicate_detection.ex index a7599d0..359417e 100644 --- a/lib/kith/duplicate_detection.ex +++ b/lib/kith/duplicate_detection.ex @@ -4,13 +4,19 @@ defmodule Kith.DuplicateDetection do alias Kith.Contacts.DuplicateCandidate alias Kith.Repo + @default_page_size 20 + def list_candidates(account_id, opts \\ []) do status = Keyword.get(opts, :status, "pending") + limit = Keyword.get(opts, :limit, @default_page_size) + offset = Keyword.get(opts, :offset, 0) DuplicateCandidate |> scope_to_account(account_id) |> where([d], d.status == ^status) |> order_by([d], desc: d.score) + |> limit(^limit) + |> offset(^offset) |> Repo.all() |> Repo.preload([:contact, :duplicate_contact]) end diff --git a/lib/kith/imports/cleanup.ex b/lib/kith/imports/cleanup.ex new file mode 100644 index 0000000..d4263ce --- /dev/null +++ b/lib/kith/imports/cleanup.ex @@ -0,0 +1,29 @@ +defmodule Kith.Imports.Cleanup do + @moduledoc """ + Wipes all import history for a single account. + + Deletes `import_records` first then `imports`. Both tables are scoped by + `account_id` directly. Called by `Kith.Workers.AccountResetWorker`. + """ + + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {records, _} = + Repo.delete_all(from(r in ImportRecord, where: r.account_id == ^account_id)) + + {imports, _} = + Repo.delete_all(from(i in Import, where: i.account_id == ^account_id)) + + Logger.info( + "[Imports.Cleanup] wiped #{records} record(s) + #{imports} import(s) for account #{account_id}" + ) + + :ok + end +end diff --git a/lib/kith/imports/job_cancellation.ex b/lib/kith/imports/job_cancellation.ex new file mode 100644 index 0000000..6eaff42 --- /dev/null +++ b/lib/kith/imports/job_cancellation.ex @@ -0,0 +1,79 @@ +defmodule Kith.Imports.JobCancellation do + @moduledoc """ + Cancels all pending/scheduled/retryable/executing Oban jobs that belong to a + single account's imports. + + Scoping rule: only jobs whose args reference this account (directly via + `account_id` or transitively via `import_id` belonging to one of this + account's imports) are touched. No other account's jobs are affected. + + Uses `Oban.cancel_all_jobs/2` which both updates the DB state and signals + any currently-`executing` jobs to terminate via Oban's Notifier (`:pkill`). + """ + + alias Kith.Imports.Import + alias Kith.Repo + + import Ecto.Query + require Logger + + # Workers whose args carry `import_id` — cancelled by import_id ∈ account's imports + @import_id_workers ~w[ + Kith.Workers.MonicaApiCrawlWorker + Kith.Workers.MonicaPhotoSyncWorker + Kith.Workers.MonicaDocumentImportWorker + Kith.Workers.ImportSourceWorker + ] + + # Workers whose args carry `account_id` directly — cancelled by account_id match + @account_id_workers ~w[ + Kith.Workers.ImportWorker + Kith.Workers.DuplicateDetectionWorker + ] + + @cancellable_states ~w[available scheduled retryable executing] + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + import_ids = account_import_ids(account_id) + import_cancelled = cancel_jobs_by_import_id(import_ids) + account_cancelled = cancel_jobs_by_account_id(account_id) + + Logger.info( + "[Imports.JobCancellation] cancelled #{import_cancelled} import-id-scoped job(s) + " <> + "#{account_cancelled} account-id-scoped job(s) for account #{account_id}" + ) + + :ok + end + + defp account_import_ids(account_id) do + Repo.all(from(i in Import, where: i.account_id == ^account_id, select: i.id)) + end + + defp cancel_jobs_by_import_id([]), do: 0 + + defp cancel_jobs_by_import_id(import_ids) do + {:ok, count} = + from(j in Oban.Job, + where: j.worker in ^@import_id_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'import_id')::bigint", j.args) in ^import_ids + ) + |> Oban.cancel_all_jobs() + + count + end + + defp cancel_jobs_by_account_id(account_id) do + {:ok, count} = + from(j in Oban.Job, + where: j.worker in ^@account_id_workers, + where: j.state in ^@cancellable_states, + where: fragment("(?->>'account_id')::bigint", j.args) == ^account_id + ) + |> Oban.cancel_all_jobs() + + count + end +end diff --git a/lib/kith/imports/sources/monica_api.ex b/lib/kith/imports/sources/monica_api.ex index 9267cf4..b7566f2 100644 --- a/lib/kith/imports/sources/monica_api.ex +++ b/lib/kith/imports/sources/monica_api.ex @@ -16,7 +16,14 @@ defmodule Kith.Imports.Sources.MonicaApi do using import_records (no API calls needed). 3. **Extra notes** — for contacts with `statistics.number_of_notes > 3`, fetch remaining notes via `GET /api/contacts/{id}/notes`. - 4. **Photos** — optionally crawl `GET /api/photos?limit=100` to import all photos. + + Per-contact "misc" data (pets, calls, activities, gifts, debts, tasks, + reminders, conversations) is planned during Phase 1 — for each contact, + endpoints with `statistics.number_of_X > 0` are recorded in + `summary.misc_data_plan` — and dispatched separately by + `Kith.Workers.MonicaMiscDataWorker`. Photo import and document import + follow the same separate-worker pattern (`MonicaPhotoSyncWorker`, + `MonicaDocumentImportWorker`). """ @behaviour Kith.Imports.Source @@ -24,15 +31,15 @@ defmodule Kith.Imports.Sources.MonicaApi do import Ecto.Query, warn: false alias Kith.Contacts + alias Kith.Contacts.PhoneFormatter alias Kith.Imports + alias Kith.Imports.Sources.MonicaApi.RateLimiter alias Kith.Repo alias Kith.Workers.MonicaDocumentImportWorker require Logger @page_limit 100 - @max_rate_limit_retries 3 - @rate_limit_sleep_ms :timer.seconds(65) # ── Behaviour callbacks ─────────────────────────────────────────────── @@ -78,6 +85,7 @@ defmodule Kith.Imports.Sources.MonicaApi do user_id: user_id, credential: credential, import_job: import_job, + opts: opts, topic: "import:#{account_id}" } @@ -103,19 +111,12 @@ defmodule Kith.Imports.Sources.MonicaApi do [] end - # Phase 4: Photos (optional) - photo_errors = - if opts["photos"] do - crawl_all_photos(credential, account_id, import_job) - else - [] - end + # Phase 4: Per-contact "misc" data (pets, calls, activities, gifts, debts, + # tasks, reminders, conversations) is now planned during the main crawl + # and dispatched as a separate `MonicaMiscDataWorker` Oban job by + # `MonicaApiCrawlWorker`. The plan is carried in `summary.misc_data_plan`. - # Phase 5-12: Additional data types (per-contact endpoints) - extra_data_errors = - import_extra_data_types(credential, account_id, user_id, import_job, opts) - - # Phase 13: Enqueue document import jobs (async, runs after main import) + # Phase 5: Enqueue document import jobs (async, runs after main import) if opts["documents"] do enqueue_document_imports(credential, account_id, user_id, import_job) end @@ -124,13 +125,11 @@ defmodule Kith.Imports.Sources.MonicaApi do acc.errors ++ ref_errors ++ notes_errors ++ - photo_errors ++ - merge_result.errors ++ - extra_data_errors + merge_result.errors error_count = - acc.error_count + length(ref_errors) + length(notes_errors) + length(photo_errors) + - length(merge_result.errors) + length(extra_data_errors) + acc.error_count + length(ref_errors) + length(notes_errors) + + length(merge_result.errors) {:ok, %{ @@ -140,7 +139,8 @@ defmodule Kith.Imports.Sources.MonicaApi do skipped: acc.skipped, merged: merge_result.merged, error_count: error_count, - errors: Enum.take(all_errors, 50) + errors: Enum.take(all_errors, 50), + misc_data_plan: Enum.reverse(deferred.misc_data) }} catch :cancelled -> @@ -163,7 +163,12 @@ defmodule Kith.Imports.Sources.MonicaApi do page: 1, total: nil, acc: %{contacts: 0, notes: 0, skipped: 0, error_count: 0, errors: []}, - deferred: %{first_met_through: [], relationships: [], extra_notes: []}, + deferred: %{ + first_met_through: [], + relationships: [], + extra_notes: [], + misc_data: [] + }, ref_data: nil, global_idx: 0 } @@ -381,7 +386,7 @@ defmodule Kith.Imports.Sources.MonicaApi do defp import_api_contact_children(ctx, contact, api_contact, source_id, ref_data, acc, deferred) do # Contact fields (embedded with ?with=contactfields) - import_api_contact_fields(contact, api_contact, ref_data, ctx.import_job) + import_api_contact_fields(contact, api_contact, ref_data, ctx) # Addresses (embedded directly) import_api_addresses(contact, api_contact, ctx.import_job) @@ -393,34 +398,52 @@ defmodule Kith.Imports.Sources.MonicaApi do import_api_tags(contact, api_contact, ref_data) # Collect deferred data - deferred = collect_deferred_data(api_contact, source_id, deferred) + deferred = collect_deferred_data(api_contact, source_id, contact.id, deferred, ctx.opts) acc = %{acc | contacts: acc.contacts + 1, notes: acc.notes + n} {acc, deferred} end - defp import_api_contact_fields(contact, api_contact, ref_data, import_job) do + defp import_api_contact_fields(contact, api_contact, ref_data, ctx) do fields = api_contact["contactFields"] || [] Enum.each(fields, fn field -> - import_single_contact_field(contact, field, ref_data, import_job) + import_single_contact_field(contact, field, ref_data, ctx) end) end - defp import_single_contact_field(contact, field, ref_data, import_job) do + defp import_single_contact_field(contact, field, ref_data, ctx) do cft_name = get_in(field, ["contact_field_type", "name"]) cft_id = if cft_name, do: Map.get(ref_data.contact_field_types, cft_name) - value = field["content"] + raw_value = field["content"] + value = normalize_field_value(raw_value, cft_id, ref_data, ctx) if cft_id && value && !contact_field_duplicate?(contact.id, cft_id, value) do - create_contact_field(contact, field, cft_id, value, import_job) + create_contact_field(contact, field, cft_id, value, ctx.import_job) + end + end + + # Normalize phone fields to E.164 at import time so detection and intra-contact + # dedup do simple equality. Other field types (email, social, etc) pass through. + defp normalize_field_value(nil, _cft_id, _ref_data, _ctx), do: nil + + defp normalize_field_value(value, cft_id, ref_data, ctx) when is_binary(value) do + if cft_id && Map.has_key?(ref_data.phone_cft_ids, cft_id) do + region = parse_phone_region(ctx.opts["phone_default_region"]) + {:ok, normalized} = PhoneFormatter.normalize(value, region) + normalized || value + else + value end end + defp parse_phone_region(region) when region in [nil, ""], do: nil + defp parse_phone_region(region) when is_binary(region), do: region + defp create_contact_field(contact, field, cft_id, value, import_job) do attrs = %{"value" => value, "contact_field_type_id" => cft_id} - case Contacts.create_contact_field(contact, attrs) do + case Contacts.create_contact_field(contact, attrs, normalize: false) do {:ok, cf} -> maybe_record_entity(import_job, "contact_field", field["uuid"], "contact_field", cf.id) @@ -506,11 +529,56 @@ defmodule Kith.Imports.Sources.MonicaApi do end) end - defp collect_deferred_data(api_contact, source_id, deferred) do + defp collect_deferred_data(api_contact, source_id, local_id, deferred, opts) do deferred |> collect_first_met_through(api_contact, source_id) |> collect_relationships(api_contact, source_id) |> collect_extra_notes(api_contact, source_id) + |> collect_misc_data(api_contact, source_id, local_id, opts) + end + + @misc_endpoints [ + {:calls, "number_of_calls"}, + {:activities, "number_of_activities"}, + {:gifts, "number_of_gifts"}, + {:debts, "number_of_debts"}, + {:tasks, "number_of_tasks"}, + {:reminders, "number_of_reminders"}, + {:conversations, "number_of_conversations"} + ] + + # Build a plan entry for a contact's per-contact extra-data endpoints. + # An endpoint is included only if (a) the wizard opt for that data type is + # not explicitly false AND (b) Monica's `statistics.number_of_X` reports + # > 0 (or the stat field is missing — safer to fetch than to silently + # skip when Monica's payload shape is unfamiliar). + # + # `:pets` has no statistics field in Monica's contact payload, so it is + # included whenever the wizard opt is on. The redundant fetch for pet-free + # contacts is the documented cost. + defp collect_misc_data(deferred, api_contact, source_id, local_id, opts) do + stats = api_contact["statistics"] || %{} + + endpoints = + @misc_endpoints + |> Enum.filter(fn {key, stat_field} -> + opts[Atom.to_string(key)] != false and (stats[stat_field] || 1) > 0 + end) + |> Enum.map(&elem(&1, 0)) + + endpoints = if opts["pets"] != false, do: [:pets | endpoints], else: endpoints + + if endpoints == [] do + deferred + else + entry = %{ + source_id: to_string(source_id), + local_id: local_id, + endpoints: Enum.map(endpoints, &Atom.to_string/1) + } + + %{deferred | misc_data: [entry | deferred.misc_data]} + end end defp collect_first_met_through(deferred, api_contact, source_id) do @@ -581,21 +649,22 @@ defmodule Kith.Imports.Sources.MonicaApi do ) ) - # Load contacts with contact fields + # Load contacts with contact fields and addresses (addresses participate + # in the broadened definite-duplicate predicate). contacts = Repo.all( from(c in Contacts.Contact, where: c.id in ^import_records and is_nil(c.deleted_at), - preload: [contact_fields: :contact_field_type] + preload: [:addresses, contact_fields: :contact_field_type] ) ) - # Group by normalized name + # Group by trimmed-and-collapsed normalized name. CardDAV-style duplicates + # (monicahq/monica#6175) often differ only in trailing whitespace or + # double-space artifacts, so the trim is load-bearing here. name_groups = contacts - |> Enum.group_by(fn c -> - {String.downcase(c.first_name || ""), String.downcase(c.last_name || "")} - end) + |> Enum.group_by(&name_key/1) |> Enum.filter(fn {_key, group} -> length(group) >= 2 end) merged_ids = MapSet.new() @@ -657,29 +726,99 @@ defmodule Kith.Imports.Sources.MonicaApi do end end + # "Definite duplicate" predicate evaluated only after callers have already + # grouped contacts by normalized {first_name, last_name}. The trimmed-name + # equality is itself a strong identity signal, so within a group a single + # shared {email, phone, address} suffices — this catches CardDAV-shaped + # duplicates (monicahq/monica#6175) where every field is identical, the + # "triple duplicates" case (same name + same email × N), and the original + # narrow case (same name + shared phone or email). defp definite_duplicate?(contact_a, contact_b) do - emails_a = extract_values_by_protocol(contact_a, "mailto") - emails_b = extract_values_by_protocol(contact_b, "mailto") + shared_emails?(contact_a, contact_b) or + shared_phones?(contact_a, contact_b) or + shared_addresses?(contact_a, contact_b) + end - phones_a = extract_values_by_protocol(contact_a, "tel") - phones_b = extract_values_by_protocol(contact_b, "tel") + defp shared_emails?(a, b) do + set_a = extract_values_by_protocol(a, "mailto", &normalize_email_value/1) + set_b = extract_values_by_protocol(b, "mailto", &normalize_email_value/1) + intersects?(set_a, set_b) + end - shared_email? = not MapSet.disjoint?(emails_a, emails_b) - shared_phone? = not MapSet.disjoint?(phones_a, phones_b) + defp shared_phones?(a, b) do + # Phone values are E.164-canonical at this point (PhoneFormatter.normalize/2 + # ran during import for `tel`-protocol fields). Comparison is strict equality + # after stripping non-digit characters as a safety net for any pre-existing + # rows that bypassed normalization. + set_a = extract_values_by_protocol(a, "tel", &normalize_phone_digits/1) + set_b = extract_values_by_protocol(b, "tel", &normalize_phone_digits/1) + intersects?(set_a, set_b) + end - shared_email? or shared_phone? + defp shared_addresses?(a, b) do + set_a = address_keys(a) + set_b = address_keys(b) + intersects?(set_a, set_b) end - defp extract_values_by_protocol(contact, protocol_prefix) do + defp extract_values_by_protocol(contact, protocol_prefix, normalizer) do contact.contact_fields |> Enum.filter(fn cf -> cf.contact_field_type && String.starts_with?(cf.contact_field_type.protocol || "", protocol_prefix) end) - |> Enum.map(fn cf -> String.downcase(cf.value || "") end) + |> Enum.map(fn cf -> normalizer.(cf.value) end) + |> Enum.reject(&(&1 in [nil, ""])) + |> MapSet.new() + end + + defp normalize_email_value(nil), do: nil + defp normalize_email_value(v) when is_binary(v), do: v |> String.trim() |> String.downcase() + + defp normalize_phone_digits(nil), do: nil + + defp normalize_phone_digits(v) when is_binary(v) do + case String.replace(v, ~r/[^0-9]/, "") do + "" -> nil + digits -> digits + end + end + + defp address_keys(contact) do + contact.addresses + |> Enum.map(fn a -> + line1 = normalize_address_part(a.line1) + postal = normalize_address_part(a.postal_code) + if line1 != "" and postal != "", do: {line1, postal}, else: nil + end) + |> Enum.reject(&is_nil/1) |> MapSet.new() end + defp normalize_address_part(nil), do: "" + + defp normalize_address_part(v) when is_binary(v) do + v |> String.trim() |> String.downcase() |> String.replace(~r/\s+/, " ") + end + + defp intersects?(a, b) do + if MapSet.size(a) == 0 or MapSet.size(b) == 0 do + false + else + not MapSet.disjoint?(a, b) + end + end + + defp name_key(contact) do + {normalize_name_part(contact.first_name), normalize_name_part(contact.last_name)} + end + + defp normalize_name_part(nil), do: "" + + defp normalize_name_part(v) when is_binary(v) do + v |> String.trim() |> String.downcase() |> String.replace(~r/\s+/, " ") + end + defp update_import_records_after_merge(account_id, import_job, old_contact_id, new_contact_id) do from(ir in Imports.ImportRecord, where: @@ -862,161 +1001,19 @@ defmodule Kith.Imports.Sources.MonicaApi do end) end - # ── Phase 4: Photo crawl ──────────────────────────────────────────── - - defp crawl_all_photos(credential, account_id, import_job) do - crawl_photos_loop(credential, account_id, import_job, _page = 1, _errors = []) - end - - defp crawl_photos_loop(credential, account_id, import_job, page, errors) do - url = "#{credential.url}/api/photos" - - case api_get_json(credential, url, limit: @page_limit, page: page) do - {:ok, %{"data" => photos, "meta" => meta}} when is_list(photos) -> - last_page = meta["last_page"] || 1 - - errors = - Enum.reduce(photos, errors, fn photo, errs -> - import_api_photo(photo, account_id, import_job, errs) - end) - - if page < last_page do - crawl_photos_loop(credential, account_id, import_job, page + 1, errors) - else - errors - end - - {:error, :rate_limited} -> - errors ++ ["Rate limited fetching photos"] - - {:error, reason} -> - errors ++ ["Failed to fetch photos page #{page}: #{inspect(reason)}"] - - _ -> - errors - end - end - - defp import_api_photo(photo, account_id, import_job, errors) do - contact_id = get_in(photo, ["contact", "id"]) - source_id = to_string(contact_id) - - contact_rec = Imports.find_import_record(account_id, "monica_api", "contact", source_id) - - if contact_rec do - contact = Repo.get(Contacts.Contact, contact_rec.local_entity_id) - - if contact do - do_import_photo(contact, photo, import_job, errors) - else - errors - end - else - Logger.debug("[MonicaApi] Skipping photo for unknown contact #{source_id}") - errors - end - end - - defp do_import_photo(contact, photo, import_job, errors) do - file_name = photo["original_filename"] || "photo.jpg" - - case decode_photo_data(photo) do - {:ok, binary} -> - store_and_create_photo(contact, photo, binary, file_name, import_job, errors) - - :no_data -> - errors - - :error -> - errors ++ ["Failed to decode photo data for #{contact.first_name}"] - end - end - - defp store_and_create_photo(contact, photo, binary, file_name, import_job, errors) do - content_hash = :crypto.hash(:sha256, binary) |> Base.encode16(case: :lower) - - if Contacts.photo_exists_by_hash?(contact.id, content_hash) do - Logger.debug("[MonicaApi] Skipping duplicate photo for #{contact.first_name}") - errors - else - upload_and_record_photo(contact, photo, binary, file_name, content_hash, import_job, errors) - end - end - - defp upload_and_record_photo( - contact, - photo, - binary, - file_name, - content_hash, - import_job, - errors - ) do - key = Kith.Storage.generate_key(contact.account_id, "photos", file_name) - - case Kith.Storage.upload_binary(binary, key) do - {:ok, _} -> - attrs = %{ - "file_name" => file_name, - "storage_key" => key, - "file_size" => byte_size(binary), - "content_type" => photo["mime_type"] || "image/jpeg", - "content_hash" => content_hash - } - - create_photo_and_set_avatar(contact, photo, attrs, import_job, errors) - - {:error, reason} -> - errors ++ ["Failed to store photo for #{contact.first_name}: #{inspect(reason)}"] - end - end - - defp create_photo_and_set_avatar(contact, photo, attrs, import_job, errors) do - case Contacts.create_photo(contact, attrs) do - {:ok, photo_record} -> - maybe_record_entity(import_job, "photo", photo["uuid"], "photo", photo_record.id) - - if is_nil(contact.avatar) do - contact |> Ecto.Changeset.change(avatar: attrs["storage_key"]) |> Repo.update!() - end - - errors - - {:error, reason} -> - Logger.warning("[MonicaApi] Photo for #{contact.first_name}: #{inspect(reason)}") - errors - end - end - - defp decode_photo_data(%{"dataUrl" => "data:" <> _ = data_url}) do - case String.split(data_url, ",", parts: 2) do - [_meta, encoded] -> {:ok, Base.decode64!(encoded)} - _ -> :error - end - rescue - _ -> :error - end - - defp decode_photo_data(%{"link" => link}) when is_binary(link) and link != "" do - case Req.get(link, receive_timeout: 30_000) do - {:ok, %{status: 200, body: body}} when is_binary(body) -> {:ok, body} - _ -> :error - end - end - - defp decode_photo_data(_), do: :no_data - # ── Reference data building ────────────────────────────────────────── defp build_or_update_ref_data(account_id, contacts, nil) do genders = collect_api_genders(contacts) tags = collect_api_tags(contacts) cfts = collect_api_contact_field_types(contacts) + cft_map = find_or_create_contact_field_types(account_id, cfts) %{ genders: find_or_create_genders(account_id, genders), tags: find_or_create_tags(account_id, tags), - contact_field_types: find_or_create_contact_field_types(account_id, cfts) + contact_field_types: cft_map, + phone_cft_ids: phone_cft_id_map(account_id, Map.values(cft_map)) } end @@ -1036,14 +1033,23 @@ defmodule Kith.Imports.Sources.MonicaApi do |> collect_api_contact_field_types() |> Enum.reject(&Map.has_key?(ref_data.contact_field_types, &1)) + added_cfts = find_or_create_contact_field_types(account_id, new_cfts) + + phone_cft_ids = + if new_cfts == [] do + ref_data.phone_cft_ids + else + Map.merge( + ref_data.phone_cft_ids, + phone_cft_id_map(account_id, Map.values(added_cfts)) + ) + end + %{ genders: Map.merge(ref_data.genders, find_or_create_genders(account_id, new_genders)), tags: Map.merge(ref_data.tags, find_or_create_tags(account_id, new_tags)), - contact_field_types: - Map.merge( - ref_data.contact_field_types, - find_or_create_contact_field_types(account_id, new_cfts) - ) + contact_field_types: Map.merge(ref_data.contact_field_types, added_cfts), + phone_cft_ids: phone_cft_ids } end @@ -1119,6 +1125,22 @@ defmodule Kith.Imports.Sources.MonicaApi do end) end + # O(1)-lookup map of phone-protocol contact_field_type IDs. A plain map + # (`%{id => true}`) is used rather than a MapSet to keep dialyzer happy + # with the ref_data shape inference. + defp phone_cft_id_map(_account_id, []), do: %{} + + defp phone_cft_id_map(account_id, cft_ids) when is_list(cft_ids) do + Repo.all( + from t in Contacts.ContactFieldType, + where: t.id in ^cft_ids, + where: is_nil(t.account_id) or t.account_id == ^account_id, + where: fragment("? LIKE 'tel%'", t.protocol), + select: t.id + ) + |> Map.new(&{&1, true}) + end + defp find_or_create_relationship_type(_account_id, nil, _reverse), do: nil defp find_or_create_relationship_type(account_id, name, reverse_name) do @@ -1140,40 +1162,28 @@ defmodule Kith.Imports.Sources.MonicaApi do # ── HTTP helpers ───────────────────────────────────────────────────── defp api_get(credential, url, params \\ []) do + RateLimiter.wait!(credential.url) + headers = [{"Authorization", "Bearer #{credential.api_key}"}, {"Accept", "application/json"}] req_options = Map.get(credential, :req_options, []) - options = [headers: headers, params: params] ++ req_options + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ req_options Req.get(url, options) end defp api_get_json(credential, url, params) do - api_get_json_with_retry(credential, url, params, 0) - end - - defp api_get_json_with_retry(_credential, _url, _params, retries) - when retries >= @max_rate_limit_retries do - {:error, :rate_limited} - end - - defp api_get_json_with_retry(credential, url, params, retries) do case api_get(credential, url, params) do - {:ok, %{status: 200, body: body}} when is_map(body) -> - {:ok, body} - - {:ok, %{status: 429}} -> - Logger.info( - "[MonicaApi] Rate limited, sleeping #{@rate_limit_sleep_ms}ms (retry #{retries + 1})" - ) - - Process.sleep(@rate_limit_sleep_ms) - api_get_json_with_retry(credential, url, params, retries + 1) - - {:ok, %{status: status}} -> - {:error, "Unexpected status: #{status}"} - - {:error, reason} -> - {:error, reason} + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} end end @@ -1311,738 +1321,6 @@ defmodule Kith.Imports.Sources.MonicaApi do end end - # ── Phases 5-12: Additional per-contact data types ───────────────── - - defp import_extra_data_types(credential, account_id, user_id, import_job, opts) do - # Get all imported contact IDs for this job - contact_records = - Repo.all( - from(ir in Imports.ImportRecord, - where: - ir.import_id == ^import_job.id and - ir.source_entity_type == "contact", - select: {ir.source_entity_id, ir.local_entity_id} - ) - ) - - errors = - Enum.flat_map(contact_records, fn {source_id, local_id} -> - contact = - Repo.get(Contacts.Contact, local_id) - - if contact && is_nil(contact.deleted_at) do - import_per_contact_data( - credential, - account_id, - user_id, - contact, - source_id, - import_job, - opts - ) - else - [] - end - end) - - errors - end - - defp import_per_contact_data( - credential, - account_id, - user_id, - contact, - source_id, - import_job, - opts - ) do - errors = [] - base_url = credential.url - - # Phase 5: Pets - errors = - if opts["pets"] do - errors ++ - import_contact_pets(credential, base_url, account_id, contact, source_id, import_job) - else - errors - end - - # Phase 6: Calls - errors = - if opts["calls"] do - errors ++ - import_contact_calls( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 7: Activities - errors = - if opts["activities"] do - errors ++ - import_contact_activities( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 8: Gifts - errors = - if opts["gifts"] do - errors ++ - import_contact_gifts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 9: Debts - errors = - if opts["debts"] do - errors ++ - import_contact_debts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 10: Tasks - errors = - if opts["tasks"] do - errors ++ - import_contact_tasks( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 11: Reminders - errors = - if opts["reminders"] do - errors ++ - import_contact_reminders( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - # Phase 12: Conversations - errors = - if opts["conversations"] do - errors ++ - import_contact_conversations( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) - else - errors - end - - errors - end - - # ── Phase 5: Pets ────────────────────────────────────────────────── - - defp import_contact_pets(credential, base_url, account_id, contact, source_id, import_job) do - url = "#{base_url}/api/contacts/#{source_id}/pets" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => pets}} when is_list(pets) -> - Enum.flat_map(pets, fn pet -> - import_single_pet(account_id, contact, pet, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch pets for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_pet(account_id, contact, pet_data, import_job) do - name = pet_data["name"] - species = normalize_pet_species(pet_data["pet_category"] || pet_data["species"]) - - if pet_duplicate?(contact.id, name, species) do - [] - else - attrs = %{ - "contact_id" => contact.id, - "name" => name || "Unknown", - "species" => species, - "breed" => non_empty_string(pet_data["breed"]), - "notes" => non_empty_string(pet_data["notes"]) - } - - case Kith.Pets.create_pet(account_id, attrs) do - {:ok, pet} -> - maybe_record_entity(import_job, "pet", pet_data["id"], "pet", pet.id) - [] - - {:error, reason} -> - ["Pet import error: #{inspect_errors(reason)}"] - end - end - end - - defp normalize_pet_species(nil), do: "other" - - defp normalize_pet_species(species) when is_map(species) do - normalize_pet_species(species["name"]) - end - - defp normalize_pet_species(species) when is_binary(species) do - normalized = String.downcase(species) - - if normalized in ~w(dog cat bird fish reptile rabbit hamster) do - normalized - else - "other" - end - end - - defp normalize_pet_species(_), do: "other" - - defp pet_duplicate?(contact_id, name, species) do - Repo.exists?( - from(p in Kith.Contacts.Pet, - where: - p.contact_id == ^contact_id and - fragment("lower(coalesce(?, ''))", p.name) == - fragment("lower(coalesce(?, ''))", ^(name || "")) and - p.species == ^species - ) - ) - end - - # ── Phase 6: Calls ───────────────────────────────────────────────── - - defp import_contact_calls( - credential, - base_url, - account_id, - _user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/calls" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => calls}} when is_list(calls) -> - Enum.flat_map(calls, fn call -> - import_single_call(account_id, contact, call, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch calls for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_call(account_id, contact, call_data, import_job) do - occurred_at = parse_datetime(call_data["called_at"]) - - if is_nil(occurred_at) do - [] - else - attrs = %{ - "occurred_at" => occurred_at, - "notes" => non_empty_string(call_data["content"]), - "duration_mins" => call_data["duration"] - } - - case Kith.Activities.create_call( - %{account_id: account_id, id: contact.id}, - attrs - ) do - {:ok, call} -> - maybe_record_entity(import_job, "call", call_data["id"], "call", call.id) - [] - - {:error, reason} -> - ["Call import error: #{inspect_errors(reason)}"] - end - end - end - - # ── Phase 7: Activities ──────────────────────────────────────────── - - defp import_contact_activities( - credential, - base_url, - account_id, - _user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/activities" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => activities}} when is_list(activities) -> - Enum.flat_map(activities, fn activity -> - import_single_activity(account_id, contact, activity, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch activities for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_activity(account_id, contact, activity_data, import_job) do - occurred_at = - parse_datetime(activity_data["happened_at"] || activity_data["date_it_happened"]) - - attrs = %{ - "title" => activity_data["summary"] || activity_data["title"] || "Imported activity", - "description" => non_empty_string(activity_data["description"]), - "occurred_at" => occurred_at || DateTime.utc_now() - } - - case Kith.Activities.create_activity(account_id, attrs, [contact.id]) do - {:ok, activity} -> - maybe_record_entity( - import_job, - "activity", - activity_data["id"], - "activity", - activity.id - ) - - [] - - {:error, reason} -> - ["Activity import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 8: Gifts ───────────────────────────────────────────────── - - defp import_contact_gifts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/gifts" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => gifts}} when is_list(gifts) -> - Enum.flat_map(gifts, fn gift -> - import_single_gift(account_id, user_id, contact, gift, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch gifts for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_gift(account_id, user_id, contact, gift_data, import_job) do - direction = - case gift_data["is_for"] do - "contact" -> "given" - _ -> "received" - end - - attrs = %{ - "contact_id" => contact.id, - "name" => gift_data["name"] || "Imported gift", - "description" => non_empty_string(gift_data["comment"]), - "direction" => direction, - "status" => - cond do - gift_data["has_been_offered"] -> "given" - gift_data["has_been_received"] -> "received" - true -> "idea" - end, - "amount" => gift_data["amount"], - "date" => parse_date_string(gift_data["date"]) - } - - case Kith.Gifts.create_gift(account_id, user_id, attrs) do - {:ok, gift} -> - maybe_record_entity(import_job, "gift", gift_data["id"], "gift", gift.id) - [] - - {:error, reason} -> - ["Gift import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 9: Debts ───────────────────────────────────────────────── - - defp import_contact_debts( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/debts" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => debts}} when is_list(debts) -> - Enum.flat_map(debts, fn debt -> - import_single_debt(account_id, user_id, contact, debt, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch debts for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_debt(account_id, user_id, contact, debt_data, import_job) do - direction = - case debt_data["in_debt"] do - "yes" -> "owed_by_me" - _ -> "owed_to_me" - end - - attrs = %{ - "contact_id" => contact.id, - "title" => debt_data["reason"] || "Imported debt", - "amount" => debt_data["amount"] || "0", - "direction" => direction, - "status" => if(debt_data["status"] == "complete", do: "settled", else: "active") - } - - case Kith.Debts.create_debt(account_id, user_id, attrs) do - {:ok, debt} -> - maybe_record_entity(import_job, "debt", debt_data["id"], "debt", debt.id) - [] - - {:error, reason} -> - ["Debt import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 10: Tasks ──────────────────────────────────────────────── - - defp import_contact_tasks( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/tasks" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => tasks}} when is_list(tasks) -> - Enum.flat_map(tasks, fn task -> - import_single_task(account_id, user_id, contact, task, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch tasks for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_task(account_id, user_id, contact, task_data, import_job) do - status = if task_data["completed"], do: "completed", else: "pending" - - attrs = %{ - "contact_id" => contact.id, - "title" => task_data["title"] || "Imported task", - "description" => non_empty_string(task_data["description"]), - "status" => status - } - - case Kith.Tasks.create_task(account_id, user_id, attrs) do - {:ok, task} -> - maybe_record_entity(import_job, "task", task_data["id"], "task", task.id) - [] - - {:error, reason} -> - ["Task import error: #{inspect_errors(reason)}"] - end - end - - # ── Phase 11: Reminders ──────────────────────────────────────────── - - defp import_contact_reminders( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/reminders" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => reminders}} when is_list(reminders) -> - Enum.flat_map(reminders, fn reminder -> - import_single_reminder(account_id, user_id, contact, reminder, import_job) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch reminders for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_reminder(account_id, user_id, contact, reminder_data, import_job) do - {type, frequency} = map_monica_reminder_frequency(reminder_data["frequency_type"]) - - next_date = - parse_date_string(reminder_data["next_expected_date"]) || - Date.utc_today() - - attrs = %{ - "contact_id" => contact.id, - "type" => type, - "title" => reminder_data["title"] || "Imported reminder", - "frequency" => frequency, - "next_reminder_date" => next_date - } - - case Kith.Reminders.create_reminder(account_id, user_id, attrs) do - {:ok, reminder} -> - maybe_record_entity( - import_job, - "reminder", - reminder_data["id"], - "reminder", - reminder.id - ) - - [] - - {:error, reason} -> - ["Reminder import error: #{inspect_errors(reason)}"] - end - end - - defp map_monica_reminder_frequency("one_time"), do: {"one_time", nil} - defp map_monica_reminder_frequency("week"), do: {"recurring", "weekly"} - defp map_monica_reminder_frequency("month"), do: {"recurring", "monthly"} - defp map_monica_reminder_frequency("year"), do: {"recurring", "annually"} - defp map_monica_reminder_frequency(_), do: {"one_time", nil} - - # ── Phase 12: Conversations ──────────────────────────────────────── - - defp import_contact_conversations( - credential, - base_url, - account_id, - user_id, - contact, - source_id, - import_job - ) do - url = "#{base_url}/api/contacts/#{source_id}/conversations" - - case api_get_json(credential, url, []) do - {:ok, %{"data" => convos}} when is_list(convos) -> - Enum.flat_map(convos, fn convo -> - import_single_conversation( - credential, - base_url, - account_id, - user_id, - contact, - convo, - import_job - ) - end) - - {:ok, _} -> - [] - - {:error, reason} -> - ["Failed to fetch conversations for contact #{source_id}: #{inspect(reason)}"] - end - end - - defp import_single_conversation( - credential, - base_url, - account_id, - user_id, - contact, - convo_data, - import_job - ) do - platform = - case convo_data["contact_field_type"] do - %{"name" => name} -> normalize_conversation_platform(name) - _ -> "other" - end - - attrs = %{ - "contact_id" => contact.id, - "platform" => platform, - "subject" => non_empty_string(convo_data["subject"]) - } - - case Kith.Conversations.create_conversation(account_id, user_id, attrs) do - {:ok, conversation} -> - maybe_record_entity( - import_job, - "conversation", - convo_data["id"], - "conversation", - conversation.id - ) - - # Import messages for this conversation - import_conversation_messages( - credential, - base_url, - conversation, - convo_data, - import_job - ) - - {:error, reason} -> - ["Conversation import error: #{inspect_errors(reason)}"] - end - end - - defp import_conversation_messages(_credential, _base_url, conversation, convo_data, import_job) do - messages = convo_data["messages"] || [] - - Enum.flat_map(messages, fn msg -> - attrs = %{ - "body" => msg["content"] || msg["written_by_me_body"] || "", - "direction" => if(msg["written_by_me"], do: "sent", else: "received"), - "sent_at" => parse_datetime(msg["written_at"]) || DateTime.utc_now() - } - - case Kith.Conversations.add_message(conversation, attrs) do - {:ok, message} -> - maybe_record_entity(import_job, "message", msg["id"], "message", message.id) - [] - - {:error, reason} -> - ["Message import error: #{inspect_errors(reason)}"] - end - end) - end - - @platform_keywords [ - {"sms", "sms"}, - {"text", "sms"}, - {"whatsapp", "whatsapp"}, - {"telegram", "telegram"}, - {"email", "email"}, - {"instagram", "instagram"}, - {"messenger", "messenger"}, - {"facebook", "messenger"}, - {"signal", "signal"} - ] - - defp normalize_conversation_platform(name) when is_binary(name) do - normalized = String.downcase(name) - - Enum.find_value(@platform_keywords, "other", fn {keyword, platform} -> - if String.contains?(normalized, keyword), do: platform - end) - end - - defp normalize_conversation_platform(_), do: "other" - - # ── Additional date/time helpers ─────────────────────────────────── - - defp parse_datetime(nil), do: nil - - defp parse_datetime(str) when is_binary(str) do - case DateTime.from_iso8601(str) do - {:ok, dt, _offset} -> dt - _ -> nil - end - end - - defp parse_datetime(_), do: nil - - defp parse_date_string(nil), do: nil - - defp parse_date_string(str) when is_binary(str) do - case parse_date_or_datetime(str) do - {:ok, date} -> date - _ -> nil - end - end - - defp parse_date_string(_), do: nil - # ── Phase 13: Document import (async) ────────────────────────────── defp enqueue_document_imports(credential, account_id, user_id, import_job) do diff --git a/lib/kith/imports/sources/monica_api/rate_limiter.ex b/lib/kith/imports/sources/monica_api/rate_limiter.ex new file mode 100644 index 0000000..a9d75cc --- /dev/null +++ b/lib/kith/imports/sources/monica_api/rate_limiter.ex @@ -0,0 +1,48 @@ +defmodule Kith.Imports.Sources.MonicaApi.RateLimiter do + @moduledoc """ + Per-host token bucket for outbound Monica API calls. + + Configured at one token below Monica's documented default of 60 requests + per minute, leaving a one-call safety margin so a small clock-skew or + burst on Monica's side does not push us into the 429 window. + + Configurable via: + + config :kith, :monica_rate_limit, + + per-test overrides via `Application.put_env/3`. + + Hammer (already a dep) supplies the underlying token bucket; we use a + bucket key per Monica host so independent Monica instances do not share + a quota. Calls block the caller process via `Process.sleep/1` until a + token is available, then return `:ok`. + """ + + @default_scale_ms 60_000 + @default_limit 55 + @default_retry_sleep_ms 1_100 + + @spec wait!(String.t()) :: :ok + def wait!(url_or_host) when is_binary(url_or_host) do + bucket = bucket_key(url_or_host) + limit = Application.get_env(:kith, :monica_rate_limit) || @default_limit + scale_ms = Application.get_env(:kith, :monica_rate_limit_scale_ms) || @default_scale_ms + + retry_sleep_ms = + Application.get_env(:kith, :monica_rate_limit_retry_sleep_ms) || @default_retry_sleep_ms + + case Hammer.check_rate(bucket, scale_ms, limit) do + {:allow, _count} -> + :ok + + {:deny, _retry_after_ms} -> + Process.sleep(retry_sleep_ms) + wait!(url_or_host) + end + end + + defp bucket_key(url_or_host) do + host = URI.parse(url_or_host).host || url_or_host + "monica_api:#{host}" + end +end diff --git a/lib/kith/journal/cleanup.ex b/lib/kith/journal/cleanup.ex new file mode 100644 index 0000000..efe2a41 --- /dev/null +++ b/lib/kith/journal/cleanup.ex @@ -0,0 +1,20 @@ +defmodule Kith.Journal.Cleanup do + @moduledoc """ + Wipes all journal entries for a single account. + """ + + alias Kith.Journal.Entry + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(e in Entry, where: e.account_id == ^account_id)) + + Logger.info("[Journal.Cleanup] wiped #{count} journal entr(ies) for account #{account_id}") + :ok + end +end diff --git a/lib/kith/policy.ex b/lib/kith/policy.ex index be22a0a..1b48ef5 100644 --- a/lib/kith/policy.ex +++ b/lib/kith/policy.ex @@ -46,6 +46,7 @@ defmodule Kith.Policy do | :journal | :duplicate_candidate | :reference_data + | :oban @doc """ Returns true if the user is authorized to perform the given action on the resource. @@ -58,6 +59,9 @@ defmodule Kith.Policy do # ── Admin: full access ─────────────────────────────────────────────── defp authorized?("admin", _action, _resource), do: true + # ── Admin-only resources: deny for editor/viewer ───────────────────── + defp authorized?(role, _action, :oban) when role in ["editor", "viewer"], do: false + # ── Editor: CRUD on contacts and content, no account/user management ─ defp authorized?("editor", :read, resource) when resource in [:account, :audit_log], do: true diff --git a/lib/kith/reminders/cleanup.ex b/lib/kith/reminders/cleanup.ex new file mode 100644 index 0000000..03a92b0 --- /dev/null +++ b/lib/kith/reminders/cleanup.ex @@ -0,0 +1,41 @@ +defmodule Kith.Reminders.Cleanup do + @moduledoc """ + Cancels all Oban jobs tracked on the account's reminders, then deletes + the reminders. FK CASCADE removes `reminder_instances`. + + Note: `reminder_rules` is intentionally NOT wiped — rules are account-level + pre-notification configuration (3 defaults seeded per account, toggleable + but not deletable per the schema) and are treated as reference data. + """ + + alias Kith.Reminders.Reminder + alias Kith.Repo + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + cancel_oban_jobs_for_account(account_id) + + {count, _} = + Repo.delete_all(from(r in Reminder, where: r.account_id == ^account_id)) + + Logger.info("[Reminders.Cleanup] wiped #{count} reminder(s) for account #{account_id}") + :ok + end + + defp cancel_oban_jobs_for_account(account_id) do + job_ids = + Repo.all( + from(r in Reminder, + where: r.account_id == ^account_id, + select: r.enqueued_oban_job_ids + ) + ) + |> List.flatten() + |> Enum.reject(&is_nil/1) + + Enum.each(job_ids, &Oban.cancel_job/1) + end +end diff --git a/lib/kith/storage/account_cleanup.ex b/lib/kith/storage/account_cleanup.ex new file mode 100644 index 0000000..0ad5e4e --- /dev/null +++ b/lib/kith/storage/account_cleanup.ex @@ -0,0 +1,87 @@ +defmodule Kith.Storage.AccountCleanup do + @moduledoc """ + Deletes physical storage objects (photos, documents, import upload files) + for a single account. + + Storage failures (S3 already-deleted, network blip) are logged at `:warning` + but never raise — they must not abort the surrounding account reset. + Storage objects are recoverable separately (S3 lifecycle, manual sweep) + and don't affect data integrity. + + Must run BEFORE `Kith.Contacts.Cleanup` — once contacts are hard-deleted, + the `photos` and `documents` rows are CASCADE-deleted and we can no longer + iterate their `storage_key` values. + """ + + alias Kith.Contacts.{Contact, Document, Photo} + alias Kith.Imports.Import + alias Kith.{Repo, Storage} + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + photo_count = delete_keys(photo_keys(account_id)) + document_count = delete_keys(document_keys(account_id)) + upload_count = delete_keys(import_upload_keys(account_id)) + + Logger.info( + "[Storage.AccountCleanup] deleted #{photo_count} photo file(s) + " <> + "#{document_count} document file(s) + #{upload_count} import upload(s) " <> + "for account #{account_id}" + ) + + :ok + end + + defp photo_keys(account_id) do + Repo.all( + from(p in Photo, + join: c in Contact, + on: p.contact_id == c.id, + where: c.account_id == ^account_id, + select: p.storage_key + ) + ) + end + + defp document_keys(account_id) do + Repo.all( + from(d in Document, + join: c in Contact, + on: d.contact_id == c.id, + where: c.account_id == ^account_id, + select: d.storage_key + ) + ) + end + + defp import_upload_keys(account_id) do + Repo.all( + from(i in Import, + where: i.account_id == ^account_id, + where: not is_nil(i.file_storage_key), + select: i.file_storage_key + ) + ) + end + + defp delete_keys(keys) do + Enum.each(keys, &safe_delete/1) + length(keys) + end + + defp safe_delete(nil), do: :ok + + defp safe_delete(key) do + case Storage.delete(key) do + :ok -> + :ok + + {:error, reason} -> + Logger.warning("[Storage.AccountCleanup] failed to delete #{key}: #{inspect(reason)}") + :ok + end + end +end diff --git a/lib/kith/tasks/cleanup.ex b/lib/kith/tasks/cleanup.ex new file mode 100644 index 0000000..64161ef --- /dev/null +++ b/lib/kith/tasks/cleanup.ex @@ -0,0 +1,20 @@ +defmodule Kith.Tasks.Cleanup do + @moduledoc """ + Wipes all tasks for a single account. + """ + + alias Kith.Repo + alias Kith.Tasks.Task + + import Ecto.Query + require Logger + + @spec wipe_for_account(account_id :: integer()) :: :ok + def wipe_for_account(account_id) do + {count, _} = + Repo.delete_all(from(t in Task, where: t.account_id == ^account_id)) + + Logger.info("[Tasks.Cleanup] wiped #{count} task(s) for account #{account_id}") + :ok + end +end diff --git a/lib/kith/workers/account_reset_worker.ex b/lib/kith/workers/account_reset_worker.ex index 49630f0..97a1364 100644 --- a/lib/kith/workers/account_reset_worker.ex +++ b/lib/kith/workers/account_reset_worker.ex @@ -1,9 +1,13 @@ defmodule Kith.Workers.AccountResetWorker do @moduledoc """ - Oban worker that resets account data — deletes all contacts and - sub-entities while preserving users, account settings, and reference data. + Resets a single account's data by orchestrating per-domain cleanup modules. - Processes in batches to avoid long-running transactions on large accounts. + Wipes everything the account owns except reference data (genders, + relationship_types, contact_field_types, etc.) and account_invitations. + Operations are scoped to the target account; no other account is affected. + + Each `@cleaners` module exposes `wipe_for_account(account_id) :: :ok`. + Order is load-bearing — see `docs/superpowers/specs/2026-05-15-account-reset-completeness-design.md`. """ use Oban.Worker, @@ -11,115 +15,54 @@ defmodule Kith.Workers.AccountResetWorker do max_attempts: 3, unique: [period: 300, fields: [:args], keys: [:account_id]] - import Ecto.Query - alias Kith.Repo + alias Kith.{ + Activities, + AuditLogs, + Contacts, + Conversations, + Imports, + Journal, + Reminders, + Storage, + Tasks + } require Logger - @batch_size 200 + @cleaners [ + Imports.JobCancellation, + Storage.AccountCleanup, + Contacts.Cleanup, + Imports.Cleanup, + Conversations.Cleanup, + Reminders.Cleanup, + Tasks.Cleanup, + Journal.Cleanup, + Activities.Cleanup, + AuditLogs.Cleanup + ] @impl Oban.Worker def perform(%Oban.Job{args: %{"account_id" => account_id}}) do - Logger.info("AccountResetWorker: starting reset for account #{account_id}") + Logger.metadata(account_id: account_id, worker: "AccountReset") + Logger.info("[AccountReset] starting reset for account #{account_id}") + write_initiated_audit_log(account_id) + + Enum.each(@cleaners, fn cleaner -> + Logger.info("[AccountReset] running #{inspect(cleaner)}") + :ok = cleaner.wipe_for_account(account_id) + end) + + Logger.info("[AccountReset] completed reset for account #{account_id}") + :ok + end - Kith.AuditLogs.create_audit_log(account_id, %{ + defp write_initiated_audit_log(account_id) do + AuditLogs.create_audit_log(account_id, %{ user_id: nil, user_name: "system", event: "account_data_reset", metadata: %{reason: "Account data reset initiated"} }) - - # 1. Cancel all Oban reminder jobs for the account - cancel_reminder_jobs(account_id) - - # 2. Delete stored files (photos, documents) - delete_stored_files(account_id) - - # 3. Delete all contacts and sub-entities in batches - delete_contacts_in_batches(account_id) - - # 4. Delete orphaned data: tags, activities without contacts - delete_tags(account_id) - delete_activities(account_id) - delete_audit_logs(account_id) - - Logger.info("AccountResetWorker: completed reset for account #{account_id}") - :ok - end - - defp cancel_reminder_jobs(account_id) do - job_ids = - from(r in Kith.Reminders.Reminder, - where: r.account_id == ^account_id, - select: r.enqueued_oban_job_ids - ) - |> Repo.all() - |> List.flatten() - - Enum.each(job_ids, &Oban.cancel_job/1) - end - - defp delete_stored_files(account_id) do - # Delete photo files - from(p in Kith.Contacts.Photo, - join: c in Kith.Contacts.Contact, - on: p.contact_id == c.id, - where: c.account_id == ^account_id, - select: p.storage_key - ) - |> Repo.all() - |> Enum.each(&safe_delete_file/1) - - # Delete document files - from(d in Kith.Contacts.Document, - join: c in Kith.Contacts.Contact, - on: d.contact_id == c.id, - where: c.account_id == ^account_id, - select: d.storage_key - ) - |> Repo.all() - |> Enum.each(&safe_delete_file/1) - end - - defp safe_delete_file(nil), do: :ok - - defp safe_delete_file(key) do - case Kith.Storage.delete(key) do - :ok -> :ok - {:error, reason} -> Logger.warning("Failed to delete file #{key}: #{inspect(reason)}") - end - end - - defp delete_contacts_in_batches(account_id) do - # Hard-delete contacts (bypassing soft-delete) — CASCADE handles sub-entities - contact_ids = - from(c in Kith.Contacts.Contact, - where: c.account_id == ^account_id, - select: c.id, - limit: @batch_size - ) - |> Repo.all() - - if contact_ids != [] do - from(c in Kith.Contacts.Contact, where: c.id in ^contact_ids) - |> Repo.delete_all() - - delete_contacts_in_batches(account_id) - end - end - - defp delete_tags(account_id) do - from(t in Kith.Contacts.Tag, where: t.account_id == ^account_id) - |> Repo.delete_all() - end - - defp delete_activities(account_id) do - from(a in Kith.Activities.Activity, where: a.account_id == ^account_id) - |> Repo.delete_all() - end - - defp delete_audit_logs(account_id) do - from(al in Kith.AuditLogs.AuditLog, where: al.account_id == ^account_id) - |> Repo.delete_all() end end diff --git a/lib/kith/workers/duplicate_detection_worker.ex b/lib/kith/workers/duplicate_detection_worker.ex index 8e67e29..fbbc0bd 100644 --- a/lib/kith/workers/duplicate_detection_worker.ex +++ b/lib/kith/workers/duplicate_detection_worker.ex @@ -5,9 +5,18 @@ defmodule Kith.Workers.DuplicateDetectionWorker do Detection algorithm: 1. Name similarity via pg_trgm similarity() on display_name (threshold: 0.5) - 2. Exact email match across contact_fields - 3. Exact phone match across contact_fields - 4. Weighted score: name(0.4) + email(0.35) + phone(0.25) + 2. Case-insensitive email match across contact_fields + 3. Normalized phone match across contact_fields (digits only) + 4. Address match on line1 + postal_code + + Scoring (max-signal + bonus): + Each signal has an independent base score: + - email_match: 0.85 + - phone_match: 0.75 + - address_match: 0.60 + - name_match: the raw pg_trgm similarity (> 0.5) + Final score = max(base scores) + 0.05 per additional signal, capped at 1.0 + Threshold: >= 0.5 """ use Oban.Worker, @@ -15,7 +24,7 @@ defmodule Kith.Workers.DuplicateDetectionWorker do max_attempts: 3 import Ecto.Query - alias Kith.Contacts.{Contact, ContactField, DuplicateCandidate} + alias Kith.Contacts.{Address, Contact, ContactField, ContactFieldType, DuplicateCandidate} alias Kith.Repo @impl Oban.Worker @@ -36,33 +45,25 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp detect_duplicates(account_id) do - # Get active contacts for this account - contacts = + contact_count = Contact |> where([c], c.account_id == ^account_id) |> where([c], is_nil(c.deleted_at)) - |> select([c], %{id: c.id, display_name: c.display_name}) - |> Repo.all() + |> Repo.aggregate(:count) - if length(contacts) < 2, do: :ok, else: find_duplicates(account_id, contacts) + if contact_count >= 2, do: find_duplicates(account_id) end - defp find_duplicates(account_id, _contacts) do - # Find name-based duplicates using pg_trgm + defp find_duplicates(account_id) do name_matches = find_name_matches(account_id) - - # Find email-based duplicates email_matches = find_email_matches(account_id) - - # Find phone-based duplicates phone_matches = find_phone_matches(account_id) + address_matches = find_address_matches(account_id) - # Merge and score all matches all_pairs = - merge_matches(name_matches, email_matches, phone_matches) - |> Enum.filter(fn {_pair, score, _reasons} -> score >= 0.4 end) + merge_matches(name_matches, email_matches, phone_matches, address_matches) + |> Enum.filter(fn {_pair, score, _reasons} -> score >= 0.5 end) - # Get existing pending/dismissed candidates to avoid re-inserting existing = DuplicateCandidate |> where([d], d.account_id == ^account_id) @@ -73,9 +74,7 @@ defmodule Kith.Workers.DuplicateDetectionWorker do now = DateTime.utc_now() |> DateTime.truncate(:second) - # Insert new candidates Enum.each(all_pairs, fn {{id1, id2}, score, reasons} -> - # Canonicalize: smaller id first {contact_id, dup_id} = if id1 < id2, do: {id1, id2}, else: {id2, id1} unless MapSet.member?(existing, {contact_id, dup_id}) do @@ -93,7 +92,6 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp find_name_matches(account_id) do - # Use pg_trgm similarity for fuzzy name matching query = """ SELECT c1.id AS id1, c2.id AS id2, similarity(c1.display_name, c2.display_name) AS sim FROM contacts c1 @@ -102,6 +100,8 @@ defmodule Kith.Workers.DuplicateDetectionWorker do WHERE c1.account_id = $1 AND c1.deleted_at IS NULL AND c2.deleted_at IS NULL + AND c1.display_name IS NOT NULL AND c1.display_name != '' + AND c2.display_name IS NOT NULL AND c2.display_name != '' AND similarity(c1.display_name, c2.display_name) > 0.5 ORDER BY sim DESC LIMIT 500 @@ -119,55 +119,100 @@ defmodule Kith.Workers.DuplicateDetectionWorker do end defp find_email_matches(account_id) do - # Find contacts that share an exact email address + # Case-insensitive email match on TRIMmed values. Trim is required because + # CardDAV-style imports occasionally leak trailing whitespace; the != '' + # checks on the trimmed form prevent whitespace-only values from forming a + # cartesian product across all such rows. query = from cf1 in ContactField, join: cf2 in ContactField, - on: cf1.value == cf2.value and cf1.id < cf2.id, - join: cft in assoc(cf1, :contact_field_type), + on: + fragment("LOWER(TRIM(?))", cf1.value) == fragment("LOWER(TRIM(?))", cf2.value) and + cf1.id < cf2.id, + join: cft1 in ContactFieldType, + on: cf1.contact_field_type_id == cft1.id, + join: cft2 in ContactFieldType, + on: cf2.contact_field_type_id == cft2.id, where: cf1.account_id == ^account_id, where: cf2.account_id == ^account_id, - where: cft.protocol == "mailto:", + where: fragment("? LIKE 'mailto%'", cft1.protocol), + where: fragment("? LIKE 'mailto%'", cft2.protocol), where: cf1.contact_id != cf2.contact_id, + where: not is_nil(cf1.value) and fragment("TRIM(?) <> ''", cf1.value), + where: not is_nil(cf2.value) and fragment("TRIM(?) <> ''", cf2.value), select: {cf1.contact_id, cf2.contact_id} query |> Repo.all() - |> Enum.uniq() |> Enum.map(fn {id1, id2} -> - {id1, id2} = if id1 < id2, do: {id1, id2}, else: {id2, id1} - {{id1, id2}, 1.0, ["email_match"]} + if id1 < id2, do: {id1, id2}, else: {id2, id1} end) - |> Enum.uniq_by(fn {pair, _, _} -> pair end) + |> Enum.uniq() + |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["email_match"]} end) end defp find_phone_matches(account_id) do - # Find contacts that share an exact phone number (normalized: digits only) + # Phone values are normalized to E.164 on import (see + # `Kith.Contacts.PhoneFormatter.normalize/2`), so this becomes a plain + # equality join. The previous in-query regex normalization combined with a + # raw-value `!= ""` filter let formatting-only inputs (`+`, `()`, `-`) + # collapse to an empty string and cartesian-explode (see Bug A). query = from cf1 in ContactField, join: cf2 in ContactField, - on: - fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf1.value) == - fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf2.value) and cf1.id < cf2.id, - join: cft in assoc(cf1, :contact_field_type), + on: cf1.value == cf2.value and cf1.id < cf2.id, + join: cft1 in ContactFieldType, + on: cf1.contact_field_type_id == cft1.id, + join: cft2 in ContactFieldType, + on: cf2.contact_field_type_id == cft2.id, where: cf1.account_id == ^account_id, where: cf2.account_id == ^account_id, - where: cft.protocol == "tel:", + where: fragment("? LIKE 'tel%'", cft1.protocol), + where: fragment("? LIKE 'tel%'", cft2.protocol), where: cf1.contact_id != cf2.contact_id, + where: not is_nil(cf1.value) and fragment("TRIM(?) <> ''", cf1.value), + where: not is_nil(cf2.value) and fragment("TRIM(?) <> ''", cf2.value), select: {cf1.contact_id, cf2.contact_id} query |> Repo.all() + |> Enum.map(fn {id1, id2} -> + if id1 < id2, do: {id1, id2}, else: {id2, id1} + end) |> Enum.uniq() + |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["phone_match"]} end) + end + + defp find_address_matches(account_id) do + # Match on normalized line1 + postal_code + query = + from a1 in Address, + join: a2 in Address, + on: + fragment("LOWER(TRIM(?))", a1.line1) == fragment("LOWER(TRIM(?))", a2.line1) and + fragment("LOWER(TRIM(?))", a1.postal_code) == + fragment("LOWER(TRIM(?))", a2.postal_code) and + a1.id < a2.id, + where: a1.account_id == ^account_id, + where: a2.account_id == ^account_id, + where: a1.contact_id != a2.contact_id, + where: a1.line1 != "" and not is_nil(a1.line1), + where: a1.postal_code != "" and not is_nil(a1.postal_code), + where: a2.line1 != "" and not is_nil(a2.line1), + where: a2.postal_code != "" and not is_nil(a2.postal_code), + select: {a1.contact_id, a2.contact_id} + + query + |> Repo.all() |> Enum.map(fn {id1, id2} -> - {id1, id2} = if id1 < id2, do: {id1, id2}, else: {id2, id1} - {{id1, id2}, 1.0, ["phone_match"]} + if id1 < id2, do: {id1, id2}, else: {id2, id1} end) - |> Enum.uniq_by(fn {pair, _, _} -> pair end) + |> Enum.uniq() + |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["address_match"]} end) end - defp merge_matches(name_matches, email_matches, phone_matches) do - (name_matches ++ email_matches ++ phone_matches) + defp merge_matches(name_matches, email_matches, phone_matches, address_matches) do + (name_matches ++ email_matches ++ phone_matches ++ address_matches) |> Enum.group_by(fn {pair, _score, _reasons} -> pair end) |> Enum.map(&compute_merged_score/1) end @@ -176,9 +221,19 @@ defmodule Kith.Workers.DuplicateDetectionWorker do reasons = matches |> Enum.flat_map(fn {_, _, r} -> r end) |> Enum.uniq() name_sim = Enum.find_value(matches, 0.0, &extract_name_score/1) - email_weight = if "email_match" in reasons, do: 0.35, else: 0.0 - phone_weight = if "phone_match" in reasons, do: 0.25, else: 0.0 - score = min(name_sim * 0.4 + email_weight + phone_weight, 1.0) + # Base score for each signal type + base_scores = + [] + |> then(fn acc -> if "email_match" in reasons, do: [0.85 | acc], else: acc end) + |> then(fn acc -> if "phone_match" in reasons, do: [0.75 | acc], else: acc end) + |> then(fn acc -> if "address_match" in reasons, do: [0.60 | acc], else: acc end) + |> then(fn acc -> if name_sim > 0.0, do: [name_sim | acc], else: acc end) + + signal_count = length(base_scores) + max_score = Enum.max(base_scores, fn -> 0.0 end) + bonus = max(signal_count - 1, 0) * 0.05 + + score = min(max_score + bonus, 1.0) {pair, Float.round(score, 2), reasons} end diff --git a/lib/kith/workers/import_source_worker.ex b/lib/kith/workers/import_source_worker.ex index 6cdf883..d5feaa6 100644 --- a/lib/kith/workers/import_source_worker.ex +++ b/lib/kith/workers/import_source_worker.ex @@ -11,6 +11,7 @@ defmodule Kith.Workers.ImportSourceWorker do alias Kith.Imports alias Kith.Storage + alias Kith.Workers.DuplicateDetectionWorker @impl Oban.Worker def perform(%Oban.Job{args: %{"import_id" => import_id}}) do @@ -33,6 +34,9 @@ defmodule Kith.Workers.ImportSourceWorker do topic = "import:#{import.account_id}" Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import.account_id})) + Logger.info("Import #{import_id} completed: #{inspect(summary_map)}") :ok else diff --git a/lib/kith/workers/import_worker.ex b/lib/kith/workers/import_worker.ex index 790620e..68dde77 100644 --- a/lib/kith/workers/import_worker.ex +++ b/lib/kith/workers/import_worker.ex @@ -11,6 +11,7 @@ defmodule Kith.Workers.ImportWorker do alias Kith.Contacts alias Kith.VCard.Parser + alias Kith.Workers.DuplicateDetectionWorker @impl Oban.Worker def perform(%Oban.Job{ @@ -42,6 +43,9 @@ defmodule Kith.Workers.ImportWorker do {:import_complete, results} ) + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: account_id})) + Logger.info( "vCard import complete for account #{account_id}: " <> "#{results.imported} imported, #{results.skipped} skipped" diff --git a/lib/kith/workers/monica_api_crawl_worker.ex b/lib/kith/workers/monica_api_crawl_worker.ex index b5355ba..bf981e9 100644 --- a/lib/kith/workers/monica_api_crawl_worker.ex +++ b/lib/kith/workers/monica_api_crawl_worker.ex @@ -2,9 +2,11 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do @moduledoc """ Oban worker that crawls a Monica CRM API instance and imports all contacts. - Single long-running job that paginates through the contacts API, imports - contacts with all embedded data, resolves cross-references, and optionally - imports photos. + Paginates through the contacts API, imports contacts with all embedded data, + and resolves cross-references. When the user opts into photos via + `api_options["photos"]`, this worker enqueues `MonicaPhotoSyncWorker` after + the main crawl completes — photo import runs as a separate job so the main + import status reflects only contact work. Connection is validated in the import wizard before this job is enqueued. """ @@ -15,6 +17,9 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do alias Kith.Imports alias Kith.Imports.Sources.MonicaApi + alias Kith.Workers.DuplicateDetectionWorker + alias Kith.Workers.MonicaMiscDataWorker + alias Kith.Workers.MonicaPhotoSyncWorker @impl Oban.Worker def perform(%Oban.Job{args: %{"import_id" => import_id}}) do @@ -36,18 +41,28 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do ) do now = DateTime.utc_now() |> DateTime.truncate(:second) summary_map = ensure_map(summary) + {misc_plan, persisted_summary} = pop_misc_plan(summary_map) Imports.update_import_status(import_job, "completed", %{ - summary: summary_map, + summary: persisted_summary, completed_at: now }) + # Enqueue misc worker BEFORE wiping the API key — it needs the + # still-encrypted key in its job args (same pattern as photo sync). + maybe_enqueue_misc_data_worker(import_job, misc_plan) Imports.wipe_api_key(import_job) topic = "import:#{import_job.account_id}" - Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map}) + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, persisted_summary}) - Logger.info("MonicaApi import #{import_id} completed: #{inspect(summary_map)}") + # Trigger duplicate detection for newly imported contacts + Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id})) + + # Enqueue photo sync (separate job) if the user opted in + maybe_enqueue_photo_sync(import_job) + + Logger.info("MonicaApi import #{import_id} completed: #{inspect(persisted_summary)}") :ok else {:error, reason} -> @@ -70,17 +85,63 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do defp build_credential(import_job) do %{ url: import_job.api_url, - api_key: import_job.api_key_encrypted + api_key: import_job.api_key_encrypted, + req_options: Application.get_env(:kith, :monica_req_options, []) } end - defp build_opts(import_job) do + @doc false + # Public for testing — the wizard → source-module flag handoff is the + # contract that Bug C silently violated, so we want a regression test that + # binds against this directly. + def build_opts(import_job) do options = import_job.api_options || %{} + # Forward every wizard-saved option so the source module is the single + # source of truth for which keys it reads. Normalize only the legacy + # extra_notes default-on semantic. + options + |> Map.put_new("extra_notes", true) + |> Map.update!("extra_notes", &(&1 != false)) + end + + defp maybe_enqueue_photo_sync(import_job) do + if get_in(import_job.api_options || %{}, ["photos"]) do + # api_key is wiped from the DB immediately after this worker completes, + # so the photo sync worker receives its own copy via job args + # (same pattern as MonicaDocumentImportWorker). + %{ + "import_id" => import_job.id, + "credential_url" => import_job.api_url, + "credential_api_key" => import_job.api_key_encrypted + } + |> MonicaPhotoSyncWorker.new() + |> Oban.insert() + end + end + + defp maybe_enqueue_misc_data_worker(_import_job, []), do: :ok + + defp maybe_enqueue_misc_data_worker(import_job, plan) do %{ - "photos" => options["photos"] || false, - "extra_notes" => options["extra_notes"] != false + "import_id" => import_job.id, + "credential_url" => import_job.api_url, + "credential_api_key" => import_job.api_key_encrypted, + "plan" => plan } + |> MonicaMiscDataWorker.new() + |> Oban.insert() + end + + # The misc-data plan is built by MonicaApi.crawl/5 and returned in the + # summary under either an atom or string key (the map round-trips through + # ensure_map/1). Pop it out before persisting so the plan is not stored + # in the DB summary. + defp pop_misc_plan(summary) do + {plan_atom, rest_atom} = Map.pop(summary, :misc_data_plan, []) + {plan_str, rest} = Map.pop(rest_atom, "misc_data_plan", []) + plan = if plan_atom == [], do: plan_str, else: plan_atom + {plan, rest} end defp ensure_map(m) when is_map(m), do: m diff --git a/lib/kith/workers/monica_misc_data_worker.ex b/lib/kith/workers/monica_misc_data_worker.ex new file mode 100644 index 0000000..298711b --- /dev/null +++ b/lib/kith/workers/monica_misc_data_worker.ex @@ -0,0 +1,692 @@ +defmodule Kith.Workers.MonicaMiscDataWorker do + @moduledoc """ + Oban worker that imports the per-contact "miscellaneous" data types + (pets, calls, activities, gifts, debts, tasks, reminders, conversations) + for an already-completed Monica API crawl. + + Enqueued by `Kith.Workers.MonicaApiCrawlWorker` on successful completion, + carrying: + + * `"import_id"` — the Import row this job belongs to. + * `"credential_url"`, `"credential_api_key"` — the credential needed to + keep calling Monica after the main crawl wipes `api_key_encrypted`. + Same pattern as `MonicaPhotoSyncWorker`. + * `"plan"` — list of `%{"source_id", "local_id", "endpoints"}` maps + pre-filtered during the main crawl using Monica's `statistics.*` + fields, so we only fire the endpoints with data. + + Throttled through `Kith.Imports.Sources.MonicaApi.RateLimiter` (same + per-host bucket as the main crawler). + + Exits early if the import has been cancelled. Contacts that were + soft-deleted between main-crawl completion and this job's dispatch are + silently skipped. + """ + + use Oban.Worker, queue: :imports, max_attempts: 3 + + require Logger + + import Ecto.Query, warn: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Imports.Sources.MonicaApi.RateLimiter + alias Kith.Repo + + @impl Oban.Worker + def timeout(_job), do: :timer.minutes(30) + + @impl Oban.Worker + def perform(%Oban.Job{args: args}) do + import_job = Imports.get_import!(args["import_id"]) + + if import_job.status in ["cancelled", "failed"] do + :ok + else + credential = build_credential(args) + plan = args["plan"] || [] + + counts = process_plan(plan, credential, import_job) + + summary = Map.put(import_job.summary || %{}, "misc", counts) + + Imports.update_import_status(import_job, import_job.status, %{summary: summary}) + + topic = "import:#{import_job.account_id}" + Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_misc_complete, counts}) + + :ok + end + end + + defp build_credential(args) do + %{ + url: args["credential_url"], + api_key: args["credential_api_key"], + req_options: Application.get_env(:kith, :monica_req_options, []) + } + end + + defp process_plan(plan, credential, import_job) do + initial = %{ + "pets" => 0, + "calls" => 0, + "activities" => 0, + "gifts" => 0, + "debts" => 0, + "tasks" => 0, + "reminders" => 0, + "conversations" => 0 + } + + user_id = import_job.user_id + + Enum.reduce(plan, initial, fn entry, counts -> + process_entry(entry, credential, user_id, import_job, counts) + end) + end + + defp process_entry(entry, credential, user_id, import_job, counts) do + contact = Contacts.get_contact_for_misc(entry["local_id"]) + + if contact == nil or not is_nil(contact.deleted_at) do + counts + else + Enum.reduce(entry["endpoints"] || [], counts, fn endpoint, counts -> + n = fire_endpoint(endpoint, credential, user_id, contact, entry["source_id"], import_job) + Map.update(counts, endpoint, n, &(&1 + n)) + end) + end + end + + defp fire_endpoint("pets", c, _u, contact, src, ij), + do: import_contact_pets(c, contact, src, ij) + + defp fire_endpoint("calls", c, _u, contact, src, ij), + do: import_contact_calls(c, contact, src, ij) + + defp fire_endpoint("activities", c, _u, contact, src, ij), + do: import_contact_activities(c, contact, src, ij) + + defp fire_endpoint("gifts", c, u, contact, src, ij), + do: import_contact_gifts(c, u, contact, src, ij) + + defp fire_endpoint("debts", c, u, contact, src, ij), + do: import_contact_debts(c, u, contact, src, ij) + + defp fire_endpoint("tasks", c, u, contact, src, ij), + do: import_contact_tasks(c, u, contact, src, ij) + + defp fire_endpoint("reminders", c, u, contact, src, ij), + do: import_contact_reminders(c, u, contact, src, ij) + + defp fire_endpoint("conversations", c, u, contact, src, ij), + do: import_contact_conversations(c, u, contact, src, ij) + + defp fire_endpoint(other, _, _, _, _, _) do + Logger.warning("[MonicaMiscData] unknown endpoint #{inspect(other)}; skipping") + 0 + end + + # ── HTTP wrapper ────────────────────────────────────────────────────── + + defp api_get_json(credential, url, params) do + RateLimiter.wait!(credential.url) + + headers = [ + {"Authorization", "Bearer #{credential.api_key}"}, + {"Accept", "application/json"} + ] + + options = + [ + headers: headers, + params: params, + max_retries: 5, + retry_log_level: :warn + ] ++ Map.get(credential, :req_options, []) + + case Req.get(url, options) do + {:ok, %{status: 200, body: body}} when is_map(body) -> {:ok, body} + {:ok, %{status: 429}} -> {:error, :rate_limited} + {:ok, %{status: status}} -> {:error, "Unexpected status: #{status}"} + {:error, reason} -> {:error, reason} + end + end + + # ── Per-contact endpoint helpers ───────────────────────────────────── + # + # Each top-level helper returns the count of items successfully imported + # for the per-endpoint summary aggregate. Per-item helpers return either + # `:ok` (success) or `{:error, _}` (skipped/failed). + + defp import_contact_pets(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/pets" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => pets}} when is_list(pets) -> + Enum.count(pets, fn pet -> + match?(:ok, import_single_pet(contact.account_id, contact, pet, import_job)) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch pets for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_pet(account_id, contact, pet_data, import_job) do + name = pet_data["name"] + species = normalize_pet_species(pet_data["pet_category"] || pet_data["species"]) + + if pet_duplicate?(contact.id, name, species) do + {:error, :duplicate} + else + attrs = %{ + "contact_id" => contact.id, + "name" => name || "Unknown", + "species" => species, + "breed" => non_empty_string(pet_data["breed"]), + "notes" => non_empty_string(pet_data["notes"]) + } + + case Kith.Pets.create_pet(account_id, attrs) do + {:ok, pet} -> + maybe_record_entity(import_job, "pet", pet_data["id"], "pet", pet.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] pet error: #{inspect_errors(reason)}") + {:error, reason} + end + end + end + + defp import_contact_calls(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/calls" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => calls}} when is_list(calls) -> + Enum.count(calls, fn call -> + match?(:ok, import_single_call(contact.account_id, contact, call, import_job)) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch calls for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_call(account_id, contact, call_data, import_job) do + occurred_at = parse_datetime(call_data["called_at"]) + + if is_nil(occurred_at) do + {:error, :no_timestamp} + else + attrs = %{ + "occurred_at" => occurred_at, + "notes" => non_empty_string(call_data["content"]), + "duration_mins" => call_data["duration"] + } + + case Kith.Activities.create_call(%{account_id: account_id, id: contact.id}, attrs) do + {:ok, call} -> + maybe_record_entity(import_job, "call", call_data["id"], "call", call.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] call error: #{inspect_errors(reason)}") + {:error, reason} + end + end + end + + defp import_contact_activities(credential, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/activities" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => activities}} when is_list(activities) -> + Enum.count(activities, fn activity -> + match?(:ok, import_single_activity(contact.account_id, contact, activity, import_job)) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch activities for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_activity(account_id, contact, activity_data, import_job) do + occurred_at = + parse_datetime(activity_data["happened_at"] || activity_data["date_it_happened"]) + + attrs = %{ + "title" => activity_data["summary"] || activity_data["title"] || "Imported activity", + "description" => non_empty_string(activity_data["description"]), + "occurred_at" => occurred_at || DateTime.utc_now() + } + + case Kith.Activities.create_activity(account_id, attrs, [contact.id]) do + {:ok, activity} -> + maybe_record_entity(import_job, "activity", activity_data["id"], "activity", activity.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] activity error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_gifts(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/gifts" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => gifts}} when is_list(gifts) -> + Enum.count(gifts, fn gift -> + match?( + :ok, + import_single_gift(contact.account_id, user_id, contact, gift, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch gifts for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_gift(account_id, user_id, contact, gift_data, import_job) do + direction = + case gift_data["is_for"] do + "contact" -> "given" + _ -> "received" + end + + attrs = %{ + "contact_id" => contact.id, + "name" => gift_data["name"] || "Imported gift", + "description" => non_empty_string(gift_data["comment"]), + "direction" => direction, + "status" => + cond do + gift_data["has_been_offered"] -> "given" + gift_data["has_been_received"] -> "received" + true -> "idea" + end, + "amount" => gift_data["amount"], + "date" => parse_date_string(gift_data["date"]) + } + + case Kith.Gifts.create_gift(account_id, user_id, attrs) do + {:ok, gift} -> + maybe_record_entity(import_job, "gift", gift_data["id"], "gift", gift.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] gift error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_debts(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/debts" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => debts}} when is_list(debts) -> + Enum.count(debts, fn debt -> + match?( + :ok, + import_single_debt(contact.account_id, user_id, contact, debt, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch debts for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_debt(account_id, user_id, contact, debt_data, import_job) do + direction = + case debt_data["in_debt"] do + "yes" -> "owed_by_me" + _ -> "owed_to_me" + end + + attrs = %{ + "contact_id" => contact.id, + "title" => debt_data["reason"] || "Imported debt", + "amount" => debt_data["amount"] || "0", + "direction" => direction, + "status" => if(debt_data["status"] == "complete", do: "settled", else: "active") + } + + case Kith.Debts.create_debt(account_id, user_id, attrs) do + {:ok, debt} -> + maybe_record_entity(import_job, "debt", debt_data["id"], "debt", debt.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] debt error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_tasks(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/tasks" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => tasks}} when is_list(tasks) -> + Enum.count(tasks, fn task -> + match?( + :ok, + import_single_task(contact.account_id, user_id, contact, task, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch tasks for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_task(account_id, user_id, contact, task_data, import_job) do + status = if task_data["completed"], do: "completed", else: "pending" + + attrs = %{ + "contact_id" => contact.id, + "title" => task_data["title"] || "Imported task", + "description" => non_empty_string(task_data["description"]), + "status" => status + } + + case Kith.Tasks.create_task(account_id, user_id, attrs) do + {:ok, task} -> + maybe_record_entity(import_job, "task", task_data["id"], "task", task.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] task error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_reminders(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/reminders" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => reminders}} when is_list(reminders) -> + Enum.count(reminders, fn reminder -> + match?( + :ok, + import_single_reminder(contact.account_id, user_id, contact, reminder, import_job) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch reminders for contact #{source_id}: #{inspect(reason)}" + ) + + 0 + end + end + + defp import_single_reminder(account_id, user_id, contact, reminder_data, import_job) do + {type, frequency} = map_monica_reminder_frequency(reminder_data["frequency_type"]) + + next_date = + parse_date_string(reminder_data["next_expected_date"]) || + Date.utc_today() + + attrs = %{ + "contact_id" => contact.id, + "type" => type, + "title" => reminder_data["title"] || "Imported reminder", + "frequency" => frequency, + "next_reminder_date" => next_date + } + + case Kith.Reminders.create_reminder(account_id, user_id, attrs) do + {:ok, reminder} -> + maybe_record_entity(import_job, "reminder", reminder_data["id"], "reminder", reminder.id) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] reminder error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_contact_conversations(credential, user_id, contact, source_id, import_job) do + url = "#{credential.url}/api/contacts/#{source_id}/conversations" + + case api_get_json(credential, url, []) do + {:ok, %{"data" => convos}} when is_list(convos) -> + Enum.count(convos, fn convo -> + match?( + :ok, + import_single_conversation( + contact.account_id, + user_id, + contact, + convo, + import_job + ) + ) + end) + + {:ok, _} -> + 0 + + {:error, reason} -> + Logger.warning( + "[MonicaMiscData] failed to fetch conversations for contact #{source_id}: " <> + inspect(reason) + ) + + 0 + end + end + + defp import_single_conversation(account_id, user_id, contact, convo_data, import_job) do + platform = + case convo_data["contact_field_type"] do + %{"name" => name} -> normalize_conversation_platform(name) + _ -> "other" + end + + attrs = %{ + "contact_id" => contact.id, + "platform" => platform, + "subject" => non_empty_string(convo_data["subject"]) + } + + case Kith.Conversations.create_conversation(account_id, user_id, attrs) do + {:ok, conversation} -> + maybe_record_entity( + import_job, + "conversation", + convo_data["id"], + "conversation", + conversation.id + ) + + import_conversation_messages(conversation, convo_data, import_job) + :ok + + {:error, reason} -> + Logger.warning("[MonicaMiscData] conversation error: #{inspect_errors(reason)}") + {:error, reason} + end + end + + defp import_conversation_messages(conversation, convo_data, import_job) do + messages = convo_data["messages"] || [] + + Enum.each(messages, fn msg -> + attrs = %{ + "body" => msg["content"] || msg["written_by_me_body"] || "", + "direction" => if(msg["written_by_me"], do: "sent", else: "received"), + "sent_at" => parse_datetime(msg["written_at"]) || DateTime.utc_now() + } + + case Kith.Conversations.add_message(conversation, attrs) do + {:ok, message} -> + maybe_record_entity(import_job, "message", msg["id"], "message", message.id) + + {:error, reason} -> + Logger.warning("[MonicaMiscData] message error: #{inspect_errors(reason)}") + end + end) + end + + # ── Local helpers (copied from MonicaApi) ──────────────────────────── + + defp normalize_pet_species(nil), do: "other" + + defp normalize_pet_species(species) when is_map(species), + do: normalize_pet_species(species["name"]) + + defp normalize_pet_species(species) when is_binary(species) do + normalized = String.downcase(species) + + if normalized in ~w(dog cat bird fish reptile rabbit hamster) do + normalized + else + "other" + end + end + + defp normalize_pet_species(_), do: "other" + + defp pet_duplicate?(contact_id, name, species) do + Repo.exists?( + from p in Kith.Contacts.Pet, + where: + p.contact_id == ^contact_id and + fragment("lower(coalesce(?, ''))", p.name) == + fragment("lower(coalesce(?, ''))", ^(name || "")) and + p.species == ^species + ) + end + + defp map_monica_reminder_frequency("one_time"), do: {"one_time", nil} + defp map_monica_reminder_frequency("week"), do: {"recurring", "weekly"} + defp map_monica_reminder_frequency("month"), do: {"recurring", "monthly"} + defp map_monica_reminder_frequency("year"), do: {"recurring", "annually"} + defp map_monica_reminder_frequency(_), do: {"one_time", nil} + + @platform_keywords [ + {"sms", "sms"}, + {"text", "sms"}, + {"whatsapp", "whatsapp"}, + {"telegram", "telegram"}, + {"email", "email"}, + {"instagram", "instagram"}, + {"messenger", "messenger"}, + {"facebook", "messenger"}, + {"signal", "signal"} + ] + + defp normalize_conversation_platform(name) when is_binary(name) do + normalized = String.downcase(name) + + Enum.find_value(@platform_keywords, "other", fn {keyword, platform} -> + if String.contains?(normalized, keyword), do: platform + end) + end + + defp normalize_conversation_platform(_), do: "other" + + defp non_empty_string(nil), do: nil + defp non_empty_string(""), do: nil + defp non_empty_string(s) when is_binary(s), do: s + defp non_empty_string(_), do: nil + + defp parse_datetime(nil), do: nil + + defp parse_datetime(str) when is_binary(str) do + case DateTime.from_iso8601(str) do + {:ok, dt, _offset} -> dt + _ -> nil + end + end + + defp parse_datetime(_), do: nil + + defp parse_date_string(nil), do: nil + + defp parse_date_string(str) when is_binary(str) do + case Date.from_iso8601(str) do + {:ok, date} -> + date + + {:error, _} -> + case DateTime.from_iso8601(str) do + {:ok, dt, _offset} -> DateTime.to_date(dt) + _ -> nil + end + end + end + + defp parse_date_string(_), do: nil + + defp inspect_errors(%Ecto.Changeset{} = changeset) do + Ecto.Changeset.traverse_errors(changeset, fn {msg, opts} -> + Regex.replace(~r"%{(\w+)}", msg, fn _, key -> + opts |> Keyword.get(String.to_existing_atom(key), key) |> to_string() + end) + end) + |> inspect() + end + + defp inspect_errors(other), do: inspect(other) + + defp maybe_record_entity(nil, _type, _id, _local_type, _local_id), do: :ok + defp maybe_record_entity(_import, _type, nil, _local_type, _local_id), do: :ok + + defp maybe_record_entity(import_job, type, source_id, local_type, local_id) do + Imports.record_imported_entity(import_job, type, to_string(source_id), local_type, local_id) + end +end diff --git a/lib/kith/workers/monica_photo_sync_worker.ex b/lib/kith/workers/monica_photo_sync_worker.ex new file mode 100644 index 0000000..8554741 --- /dev/null +++ b/lib/kith/workers/monica_photo_sync_worker.ex @@ -0,0 +1,336 @@ +defmodule Kith.Workers.MonicaPhotoSyncWorker do + @moduledoc """ + Imports photos for a Monica API import after the main contact crawl completes. + + Enqueued by `MonicaApiCrawlWorker` when `api_options["photos"]` is true. + Paginates `GET /api/photos`, decodes each photo's inline `dataUrl`, dedups + by SHA-256 content hash, persists to storage and the `photos` table, and + sets the owning contact's avatar if not already set. + + Writes incremental progress to `import.sync_summary` after each page so the + import-history UI shows live counts and a per-photo table. + """ + + use Oban.Worker, queue: :imports, max_attempts: 3 + + require Logger + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Repo + alias Kith.Storage + + @page_limit 100 + @max_rate_limit_retries 3 + @rate_limit_sleep_ms :timer.seconds(65) + @max_photos_in_summary 500 + @log_prefix "[MonicaPhotoSync]" + + @impl Oban.Worker + def perform(%Oban.Job{ + args: %{ + "import_id" => import_id, + "credential_url" => credential_url, + "credential_api_key" => credential_api_key + } + }) do + import_job = Imports.get_import!(import_id) + Logger.metadata(import_id: import_id, worker: "MonicaPhotoSync") + Logger.info("#{@log_prefix} Starting photo sync for import #{import_id}") + + credential = %{ + url: credential_url, + api_key: credential_api_key, + req_options: Application.get_env(:kith, :monica_req_options, []) + } + + initial = empty_summary() + Imports.update_sync_summary(import_job, initial) + + case crawl_pages(credential, import_job, 1, initial) do + {:ok, final} -> + Imports.update_sync_summary(import_job, final) + + Logger.info( + "#{@log_prefix} Photo sync complete: " <> + "#{final["synced"]}/#{final["total"]} synced, " <> + "#{final["failed"]} failed, #{final["not_found"]} not_found" + ) + + :ok + + {:error, reason} -> + Logger.error("#{@log_prefix} Photo sync failed: #{inspect(reason)}") + {:error, reason} + end + end + + @impl Oban.Worker + def timeout(_job), do: :timer.minutes(30) + + # ── Page loop ─────────────────────────────────────────────────────────── + + defp crawl_pages(credential, import_job, page, summary) do + url = "#{credential.url}/api/photos" + + case api_get_json(credential, url, limit: @page_limit, page: page) do + {:ok, %{"data" => photos, "meta" => meta}} when is_list(photos) -> + last_page = meta["last_page"] || 1 + + Logger.info( + "#{@log_prefix} page #{page}: #{length(photos)} photos (#{last_page} pages total)" + ) + + page_summary = + Enum.reduce(photos, summary, fn photo, acc -> + import_one_photo(photo, import_job, acc) + end) + + Logger.info( + "#{@log_prefix} page #{page} done (running: " <> + "#{page_summary["synced"]}/#{page_summary["total"]} synced, " <> + "#{page_summary["failed"]} failed, #{page_summary["not_found"]} not_found)" + ) + + Imports.update_sync_summary(import_job, page_summary) + + if page < last_page do + crawl_pages(credential, import_job, page + 1, page_summary) + else + {:ok, page_summary} + end + + {:error, reason} -> + Logger.warning("#{@log_prefix} Failed to fetch photos page #{page}: #{inspect(reason)}") + {:error, reason} + + other -> + Logger.warning("#{@log_prefix} Unexpected response on page #{page}: #{inspect(other)}") + {:ok, summary} + end + end + + # ── Per-photo flow ───────────────────────────────────────────────────── + + defp import_one_photo(photo, import_job, summary) do + summary = bump(summary, "total") + uuid = photo["uuid"] + monica_contact_id = get_in(photo, ["contact", "id"]) + + case resolve_contact(import_job.account_id, monica_contact_id) do + {:ok, contact} -> + handle_decode(contact, photo, import_job, summary, uuid) + + {:not_found, reason} -> + Logger.info("#{@log_prefix} photo #{uuid}: #{reason}") + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => monica_contact_id, + "status" => "not_found", + "reason" => reason + }) + |> bump("not_found") + end + end + + defp resolve_contact(_account_id, nil), + do: {:not_found, "missing contact id in /api/photos response"} + + defp resolve_contact(account_id, monica_contact_id) do + source_id = to_string(monica_contact_id) + + case Imports.find_import_record(account_id, "monica_api", "contact", source_id) do + nil -> + {:not_found, "contact #{source_id} not in import_records"} + + %{local_entity_id: local_id} -> + case Repo.get(Contacts.Contact, local_id) do + nil -> + {:not_found, "local contact #{local_id} not found"} + + %{deleted_at: deleted_at} when not is_nil(deleted_at) -> + {:not_found, "local contact #{local_id} is soft-deleted"} + + contact -> + {:ok, contact} + end + end + end + + defp handle_decode(contact, photo, import_job, summary, uuid) do + case decode_photo_data(photo) do + {:ok, binary} -> + handle_dedup(contact, photo, binary, import_job, summary, uuid) + + {:error, reason} -> + Logger.warning("#{@log_prefix} photo #{uuid}: failed (#{reason})") + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "failed", + "reason" => to_string(reason) + }) + |> bump("failed") + end + end + + defp handle_dedup(contact, photo, binary, import_job, summary, uuid) do + content_hash = :crypto.hash(:sha256, binary) |> Base.encode16(case: :lower) + + if Contacts.photo_exists_by_hash?(contact.id, content_hash) do + Logger.debug( + "#{@log_prefix} photo #{uuid}: duplicate hash for contact #{contact.id}, skipping" + ) + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "synced", + "reason" => "duplicate" + }) + |> bump("synced") + else + do_upload(contact, photo, binary, content_hash, import_job, summary, uuid) + end + end + + defp do_upload(contact, photo, binary, content_hash, import_job, summary, uuid) do + file_name = photo["original_filename"] || "photo.jpg" + key = Storage.generate_key(contact.account_id, "photos", file_name) + + attrs = %{ + "file_name" => file_name, + "storage_key" => key, + "file_size" => byte_size(binary), + "content_type" => photo["mime_type"] || "image/jpeg", + "content_hash" => content_hash + } + + with {:ok, _} <- Storage.upload_binary(binary, key), + {:ok, photo_record} <- Contacts.create_photo(contact, attrs) do + maybe_record_entity(import_job, uuid, photo_record.id) + maybe_set_avatar(contact, key) + + Logger.debug( + "#{@log_prefix} photo #{uuid} → contact #{contact.id}: synced " <> + "(hash #{String.slice(content_hash, 0, 8)})" + ) + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "synced" + }) + |> bump("synced") + else + {:error, reason} -> + reason_str = inspect(reason) + Logger.warning("#{@log_prefix} photo #{uuid}: failed (#{reason_str})") + + summary + |> record_photo(%{ + "uuid" => uuid, + "contact_id" => contact.id, + "status" => "failed", + "reason" => reason_str + }) + |> bump("failed") + end + end + + defp maybe_set_avatar(%{avatar: nil} = contact, key) do + contact + |> Ecto.Changeset.change(avatar: key) + |> Repo.update!() + end + + defp maybe_set_avatar(_contact, _key), do: :ok + + defp maybe_record_entity(_import_job, nil, _local_id), do: :ok + + defp maybe_record_entity(import_job, uuid, local_id), + do: Imports.record_imported_entity(import_job, "photo", uuid, "photo", local_id) + + # ── Summary helpers ──────────────────────────────────────────────────── + + defp empty_summary do + %{ + "total" => 0, + "synced" => 0, + "failed" => 0, + "not_found" => 0, + "photos" => [] + } + end + + defp bump(summary, key), do: Map.update!(summary, key, &(&1 + 1)) + + defp record_photo(summary, entry) do + Map.update!(summary, "photos", fn list -> + [entry | Enum.take(list, @max_photos_in_summary - 1)] + end) + end + + # ── Decoding ─────────────────────────────────────────────────────────── + + defp decode_photo_data(%{"dataUrl" => "data:" <> _ = data_url}) do + case String.split(data_url, ",", parts: 2) do + [_meta, encoded] -> + case Base.decode64(encoded) do + {:ok, binary} -> {:ok, binary} + :error -> {:error, :base64_decode_failed} + end + + _ -> + {:error, :malformed_data_url} + end + end + + defp decode_photo_data(_), do: {:error, :no_data_url} + + # ── HTTP helpers ─────────────────────────────────────────────────────── + + defp api_get_json(credential, url, params), + do: api_get_json_with_retry(credential, url, params, 0) + + defp api_get_json_with_retry(_credential, _url, _params, retries) + when retries >= @max_rate_limit_retries, + do: {:error, :rate_limited} + + defp api_get_json_with_retry(credential, url, params, retries) do + case api_get(credential, url, params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> + {:ok, body} + + {:ok, %{status: 429}} -> + Logger.warning( + "#{@log_prefix} rate limited, sleeping #{@rate_limit_sleep_ms}ms (retry #{retries + 1})" + ) + + Process.sleep(@rate_limit_sleep_ms) + api_get_json_with_retry(credential, url, params, retries + 1) + + {:ok, %{status: status}} -> + {:error, "Unexpected status: #{status}"} + + {:error, reason} -> + {:error, reason} + end + end + + defp api_get(credential, url, params) do + headers = [ + {"Authorization", "Bearer #{credential.api_key}"}, + {"Accept", "application/json"} + ] + + options = [headers: headers, params: params] ++ Map.get(credential, :req_options, []) + Req.get(url, options) + end +end diff --git a/lib/kith/workers/phone_renormalize_worker.ex b/lib/kith/workers/phone_renormalize_worker.ex new file mode 100644 index 0000000..66297b5 --- /dev/null +++ b/lib/kith/workers/phone_renormalize_worker.ex @@ -0,0 +1,117 @@ +defmodule Kith.Workers.PhoneRenormalizeWorker do + @moduledoc """ + One-shot Oban worker that re-normalizes existing phone-protocol contact_fields + to E.164 using each account's locale-derived region. + + Run once after the libphonenumber-backed `PhoneFormatter.normalize/2` lands, + to migrate values written under the previous heuristic (e.g. "5551234567" + stored without a country code) into canonical E.164 form so the detection + worker can match by plain equality. + + Args: + * `"account_id"` (optional) — scope to a single account; omit to process all. + + Idempotent: rows whose normalized form already equals the stored value are + skipped, so re-running is safe. + """ + + use Oban.Worker, queue: :default, max_attempts: 3 + + import Ecto.Query + + alias Kith.Accounts.Account + alias Kith.Contacts.{ContactField, ContactFieldType, PhoneFormatter} + alias Kith.Repo + + require Logger + + @batch_size 500 + + @impl Oban.Worker + def perform(%Oban.Job{args: %{"account_id" => account_id}}) do + renormalize_account(account_id) + :ok + end + + @impl Oban.Worker + def perform(%Oban.Job{args: _args}) do + account_ids = Repo.all(from(a in Account, select: a.id)) + Enum.each(account_ids, &renormalize_account/1) + :ok + end + + defp renormalize_account(account_id) do + region = + Repo.one(from(a in Account, where: a.id == ^account_id, select: a.locale)) + |> PhoneFormatter.region_for_locale() + + # ContactFieldTypes can be either system-wide (`account_id IS NULL`) or + # account-specific. Match both — the detection worker uses the same + # protocol-only filter so this mirrors that. + phone_cft_ids = + from(t in ContactFieldType, + where: is_nil(t.account_id) or t.account_id == ^account_id, + where: fragment("? LIKE 'tel%'", t.protocol), + select: t.id + ) + |> Repo.all() + + if phone_cft_ids != [] do + renormalize_batch(account_id, phone_cft_ids, region, 0, 0) + end + end + + defp renormalize_batch(account_id, cft_ids, region, offset, updated_count) do + rows = + from(cf in ContactField, + where: cf.account_id == ^account_id, + where: cf.contact_field_type_id in ^cft_ids, + order_by: [asc: cf.id], + offset: ^offset, + limit: @batch_size, + select: {cf.id, cf.value} + ) + |> Repo.all() + + if rows == [] do + Logger.info("[PhoneRenormalizeWorker] account=#{account_id} done, updated=#{updated_count}") + + :ok + else + batch_updated = count_updates(rows, region) + + renormalize_batch( + account_id, + cft_ids, + region, + offset + @batch_size, + updated_count + batch_updated + ) + end + end + + defp count_updates(rows, region) do + Enum.reduce(rows, 0, fn {id, value}, acc -> + acc + update_to_int(maybe_update(id, value, region)) + end) + end + + defp update_to_int(:updated), do: 1 + defp update_to_int(:unchanged), do: 0 + + defp maybe_update(_id, nil, _region), do: :unchanged + defp maybe_update(_id, "", _region), do: :unchanged + + defp maybe_update(id, value, region) do + {:ok, normalized} = PhoneFormatter.normalize(value, region) + + if normalized && normalized != value do + from(cf in ContactField, where: cf.id == ^id) + |> Repo.update_all(set: [value: normalized]) + + :updated + else + :unchanged + end + end +end diff --git a/lib/kith_web/components/layouts.ex b/lib/kith_web/components/layouts.ex index 373a562..0b6a92d 100644 --- a/lib/kith_web/components/layouts.ex +++ b/lib/kith_web/components/layouts.ex @@ -97,6 +97,7 @@ defmodule KithWeb.Layouts do match_prefix="/settings" /> <.sidebar_link + :if={Kith.Policy.can?(@current_scope.user, :manage, :oban)} path={~p"/admin/oban"} current_path={@current_path} icon="hero-queue-list" @@ -233,6 +234,7 @@ defmodule KithWeb.Layouts do match_prefix="/settings" /> <.mobile_nav_link + :if={Kith.Policy.can?(@current_scope.user, :manage, :oban)} path={~p"/admin/oban"} current_path={@current_path} icon="hero-queue-list" diff --git a/lib/kith_web/live/admin_live/oban_dashboard.ex b/lib/kith_web/live/admin_live/oban_dashboard.ex deleted file mode 100644 index 1ecc41b..0000000 --- a/lib/kith_web/live/admin_live/oban_dashboard.ex +++ /dev/null @@ -1,412 +0,0 @@ -defmodule KithWeb.AdminLive.ObanDashboard do - @moduledoc """ - Minimal Oban dashboard for free Oban (no Oban Pro/Web). - - Queries the oban_jobs table directly. Admin-only access. - """ - - use KithWeb, :live_view - - import Ecto.Query - - alias Kith.Repo - - @refresh_interval 5_000 - - @impl true - def mount(_params, _session, socket) do - user = socket.assigns.current_scope.user - - if Kith.Policy.can?(user, :manage, :account) do - if connected?(socket), do: Process.send_after(self(), :refresh, @refresh_interval) - {:ok, socket |> assign(:page_title, "Oban Dashboard") |> load_data()} - else - {:ok, - socket - |> put_flash(:error, "Admin access required.") - |> redirect(to: ~p"/dashboard")} - end - end - - @impl true - def handle_info(:refresh, socket) do - Process.send_after(self(), :refresh, @refresh_interval) - {:noreply, load_data(socket)} - end - - @impl true - def handle_event("retry-job", %{"id" => id}, socket) do - job_id = String.to_integer(id) - Oban.retry_job(job_id) - {:noreply, socket |> put_flash(:info, "Job #{job_id} retried.") |> load_data()} - end - - def handle_event("discard-job", %{"id" => id}, socket) do - job_id = String.to_integer(id) - Oban.cancel_job(job_id) - {:noreply, socket |> put_flash(:info, "Job #{job_id} discarded.") |> load_data()} - end - - defp load_data(socket) do - socket - |> assign(:queue_stats, fetch_queue_stats()) - |> assign(:recent_failures, fetch_recent_failures()) - |> assign(:recent_jobs, fetch_recent_jobs()) - |> assign(:photo_sync_jobs, fetch_photo_sync_jobs()) - end - - defp fetch_queue_stats do - from(j in "oban_jobs", - group_by: [j.queue, j.state], - select: %{ - queue: j.queue, - state: j.state, - count: count(j.id) - } - ) - |> Repo.all() - |> Enum.group_by(& &1.queue) - end - - defp fetch_recent_failures do - from(j in "oban_jobs", - where: j.state in ["retryable", "discarded"], - order_by: [desc: j.attempted_at], - limit: 20, - select: %{ - id: j.id, - worker: j.worker, - queue: j.queue, - state: j.state, - attempt: j.attempt, - max_attempts: j.max_attempts, - attempted_at: j.attempted_at, - errors: j.errors - } - ) - |> Repo.all() - end - - defp fetch_photo_sync_jobs do - from(j in "oban_jobs", - where: j.worker == "Kith.Workers.PhotoBatchSyncWorker", - order_by: [desc: j.inserted_at], - limit: 10, - select: %{ - id: j.id, - state: j.state, - attempt: j.attempt, - max_attempts: j.max_attempts, - args: j.args, - inserted_at: j.inserted_at, - attempted_at: j.attempted_at - } - ) - |> Repo.all() - |> Enum.map(fn job -> - import_id = get_in(job.args, ["import_id"]) - import_record = import_id && Kith.Imports.get_import(import_id) - - Map.merge(job, %{ - import_id: import_id, - import_source: import_record && import_record.source, - sync_summary: import_record && import_record.sync_summary - }) - end) - end - - defp fetch_recent_jobs do - from(j in "oban_jobs", - order_by: [desc: j.inserted_at], - limit: 30, - select: %{ - id: j.id, - worker: j.worker, - queue: j.queue, - state: j.state, - attempt: j.attempt, - max_attempts: j.max_attempts, - inserted_at: j.inserted_at, - attempted_at: j.attempted_at - } - ) - |> Repo.all() - end - - @impl true - def render(assigns) do - ~H""" - - - Oban Dashboard - <:subtitle>Background job monitoring (refreshes every 5s) - - - <%!-- Queue Overview --%> -
-
-

{queue}

-
-
- {s.state} - {s.count} -
-
-
-
- - <%!-- Recent Failures --%> -
-

Recent Failures

- <%= if @recent_failures == [] do %> -

No recent failures.

- <% else %> -
- - - - - - - - - - - - - - - - - - - - - - - -
- ID - - Worker - - Queue - - State - - Attempt - - Last Run - - Actions -
- {job.id} - - {short_worker(job.worker)} - {job.queue} - {job.state} - - {job.attempt}/{job.max_attempts} - - {format_time(job.attempted_at)} - - - -
-
- <% end %> -
- - <%!-- Photo Sync Jobs --%> -
-

Photo Sync Jobs

- <%= if @photo_sync_jobs == [] do %> -

No photo sync jobs.

- <% else %> -
- - - - - - - - - - - - - - - - - - - - - -
- ID - - Import - - State - - Progress - - Attempt - - Created -
- {job.id} - - <%= if job.import_id do %> - <.link - navigate={~p"/settings/imports/#{job.import_id}"} - class="text-[var(--color-accent)] hover:underline" - > - #{job.import_id} ({job.import_source || "?"}) - - <% else %> - — - <% end %> - - {job.state} - - {sync_progress(job.sync_summary)} - - {job.attempt}/{job.max_attempts} - - {format_time(job.inserted_at)} -
-
- <% end %> -
- - <%!-- Recent Jobs --%> -
-

Recent Jobs

-
- - - - - - - - - - - - - - - - - - - - - -
- ID - - Worker - - Queue - - State - - Attempt - - Inserted -
- {job.id} - - {short_worker(job.worker)} - {job.queue} - {job.state} - - {job.attempt}/{job.max_attempts} - - {format_time(job.inserted_at)} -
-
-
-
- """ - end - - defp short_worker(worker) do - worker - |> String.split(".") - |> List.last() - end - - defp state_color("completed"), do: "text-[var(--color-success)]" - defp state_color("available"), do: "text-[var(--color-info)]" - defp state_color("scheduled"), do: "text-[var(--color-text-tertiary)]" - defp state_color("executing"), do: "text-[var(--color-warning)]" - defp state_color("retryable"), do: "text-[var(--color-error)]" - defp state_color("discarded"), do: "text-[var(--color-error)]/50" - defp state_color(_), do: "text-[var(--color-text-primary)]" - - defp state_variant("completed"), do: "success" - defp state_variant("available"), do: "info" - defp state_variant("scheduled"), do: "default" - defp state_variant("executing"), do: "warning" - defp state_variant("retryable"), do: "error" - defp state_variant("discarded"), do: "error" - defp state_variant(_), do: "default" - - defp sync_progress(%{"synced" => synced, "total" => total}), do: "#{synced}/#{total} synced" - defp sync_progress(_), do: "—" - - defp format_time(nil), do: "-" - - defp format_time(%NaiveDateTime{} = ndt) do - case Kith.Cldr.DateTime.to_string(ndt, format: :medium) do - {:ok, str} -> str - _ -> to_string(ndt) - end - end - - defp format_time(%DateTime{} = dt) do - case Kith.Cldr.DateTime.to_string(dt, format: :medium) do - {:ok, str} -> str - _ -> to_string(dt) - end - end - - defp format_time(_), do: "-" -end diff --git a/lib/kith_web/live/contact_live/duplicates.ex b/lib/kith_web/live/contact_live/duplicates.ex index 1dad4f5..36c951b 100644 --- a/lib/kith_web/live/contact_live/duplicates.ex +++ b/lib/kith_web/live/contact_live/duplicates.ex @@ -7,12 +7,16 @@ defmodule KithWeb.ContactLive.Duplicates do alias Kith.Policy alias Kith.Workers.DuplicateDetectionWorker + @page_size 20 + @impl true def mount(_params, _session, socket) do {:ok, socket |> assign(:page_title, "Duplicate Contacts") - |> assign(:candidates, [])} + |> assign(:candidates, []) + |> assign(:has_more, false) + |> assign(:total_count, 0)} end @impl true @@ -20,12 +24,15 @@ defmodule KithWeb.ContactLive.Duplicates do scope = socket.assigns.current_scope account_id = scope.account.id - candidates = DuplicateDetection.list_candidates(account_id) + candidates = DuplicateDetection.list_candidates(account_id, limit: @page_size) + total_count = DuplicateDetection.pending_count(account_id) {:noreply, socket |> assign(:account_id, account_id) - |> assign(:candidates, candidates)} + |> assign(:candidates, candidates) + |> assign(:total_count, total_count) + |> assign(:has_more, length(candidates) >= @page_size)} end @impl true @@ -35,15 +42,32 @@ defmodule KithWeb.ContactLive.Duplicates do {:ok, _} = DuplicateDetection.dismiss_candidate(candidate) - candidates = DuplicateDetection.list_candidates(socket.assigns.account_id) + candidates = Enum.reject(socket.assigns.candidates, &(&1.id == candidate.id)) + total_count = socket.assigns.total_count - 1 {:noreply, socket |> assign(:candidates, candidates) - |> assign(:pending_duplicates_count, length(candidates)) + |> assign(:total_count, total_count) + |> assign(:pending_duplicates_count, total_count) |> put_flash(:info, "Duplicate dismissed.")} end + def handle_event("load_more", _params, socket) do + offset = length(socket.assigns.candidates) + + more = + DuplicateDetection.list_candidates(socket.assigns.account_id, + limit: @page_size, + offset: offset + ) + + {:noreply, + socket + |> assign(:candidates, socket.assigns.candidates ++ more) + |> assign(:has_more, length(more) >= @page_size)} + end + def handle_event("scan", _params, socket) do user = socket.assigns.current_scope.user @@ -79,7 +103,7 @@ defmodule KithWeb.ContactLive.Duplicates do

Duplicate Contacts

- {length(@candidates)} potential duplicate{if length(@candidates) != 1, do: "s"} found + {@total_count} potential duplicate{if @total_count != 1, do: "s"} found

+ {candidate.duplicate_contact.display_name} +
- <% end %> + +
+ <.link + navigate={ + ~p"/contacts/#{candidate.contact.id}/merge?with=#{candidate.duplicate_contact.id}&candidate_id=#{candidate.id}" + } + class="inline-flex items-center gap-1.5 rounded-[var(--radius-md)] bg-[var(--color-accent)] text-[var(--color-accent-foreground)] px-3 py-1.5 text-xs font-medium hover:bg-[var(--color-accent-hover)] transition-colors" + > + <.icon name="hero-arrows-right-left" class="size-4" /> Merge + + +
+ + +
+ +
<% end %> diff --git a/lib/kith_web/live/contact_live/index.ex b/lib/kith_web/live/contact_live/index.ex index c1f3b55..491b9b4 100644 --- a/lib/kith_web/live/contact_live/index.ex +++ b/lib/kith_web/live/contact_live/index.ex @@ -33,6 +33,8 @@ defmodule KithWeb.ContactLive.Index do |> assign(:meta, nil) |> assign(:tags, Contacts.list_tags(account_id)) |> assign(:candidates, []) + |> assign(:duplicates_total, 0) + |> assign(:duplicates_has_more, false) |> assign(:trashed_contacts, [])} end @@ -55,12 +57,18 @@ defmodule KithWeb.ContactLive.Index do |> load_contacts() end + @duplicates_page_size 20 + defp apply_action(socket, :duplicates, _params) do - candidates = DuplicateDetection.list_candidates(socket.assigns.account_id) + account_id = socket.assigns.account_id + candidates = DuplicateDetection.list_candidates(account_id, limit: @duplicates_page_size) + total_count = DuplicateDetection.pending_count(account_id) socket |> assign(:page_title, "Duplicate Contacts") |> assign(:candidates, candidates) + |> assign(:duplicates_total, total_count) + |> assign(:duplicates_has_more, length(candidates) >= @duplicates_page_size) end defp apply_action(socket, :trash, _params) do @@ -245,15 +253,32 @@ defmodule KithWeb.ContactLive.Index do {:ok, _} = DuplicateDetection.dismiss_candidate(candidate) - candidates = DuplicateDetection.list_candidates(socket.assigns.account_id) + candidates = Enum.reject(socket.assigns.candidates, &(&1.id == candidate.id)) + total = socket.assigns.duplicates_total - 1 {:noreply, socket |> assign(:candidates, candidates) - |> assign(:pending_duplicates_count, length(candidates)) + |> assign(:duplicates_total, total) + |> assign(:pending_duplicates_count, total) |> put_flash(:info, "Duplicate dismissed.")} end + def handle_event("load_more_duplicates", _params, socket) do + offset = length(socket.assigns.candidates) + + more = + DuplicateDetection.list_candidates(socket.assigns.account_id, + limit: @duplicates_page_size, + offset: offset + ) + + {:noreply, + socket + |> assign(:candidates, socket.assigns.candidates ++ more) + |> assign(:duplicates_has_more, length(more) >= @duplicates_page_size)} + end + def handle_event("scan", _params, socket) do user = socket.assigns.current_scope.user diff --git a/lib/kith_web/live/contact_live/index.html.heex b/lib/kith_web/live/contact_live/index.html.heex index 7e08df4..3db183f 100644 --- a/lib/kith_web/live/contact_live/index.html.heex +++ b/lib/kith_web/live/contact_live/index.html.heex @@ -381,7 +381,7 @@

- {length(@candidates)} potential duplicate{if length(@candidates) != 1, do: "s"} found + {@duplicates_total} potential duplicate{if @duplicates_total != 1, do: "s"} found

<% end %> + + <%= if @duplicates_has_more do %> +
+ +
+ <% end %> <% end %> <% end %> diff --git a/lib/kith_web/live/contact_live/merge.ex b/lib/kith_web/live/contact_live/merge.ex index 6bdad13..f45dbbc 100644 --- a/lib/kith_web/live/contact_live/merge.ex +++ b/lib/kith_web/live/contact_live/merge.ex @@ -150,13 +150,7 @@ defmodule KithWeb.ContactLive.Merge do end end - # ── Step 3: Preview ──────────────────────────────────────────────────── - - def handle_event("confirm-merge", _params, socket) do - {:noreply, assign(socket, :step, 4)} - end - - # ── Step 4: Execute ──────────────────────────────────────────────────── + # ── Step 3: Preview & Execute ─────────────────────────────────────────── def handle_event("execute-merge", _params, socket) do contact_a = socket.assigns.contact_a @@ -233,14 +227,14 @@ defmodule KithWeb.ContactLive.Merge do Merge Contacts - Step {@step} of 4 — {step_label(@step)} + Step {@step} of 3 — {step_label(@step)} <%!-- Step indicator (horizontal stepper) --%>
- Back - Confirm Merge -
- -
- - <%!-- Step 4: Confirm & Execute --%> -
- -
-

- Final Confirmation -

-

- This action cannot be easily undone. Are you sure you want to merge - - {@contact_b.display_name} - - into {@contact_a.display_name}? -

- -
- - Go Back - - - {if @merging, do: "Merging...", else: "Merge Contacts"} - -
+ Back + + {if @merging, do: "Merging...", else: "Merge Contacts"} +
@@ -463,8 +432,7 @@ defmodule KithWeb.ContactLive.Merge do defp step_label(1), do: "Select contact" defp step_label(2), do: "Choose fields" - defp step_label(3), do: "Preview" - defp step_label(4), do: "Confirm" + defp step_label(3), do: "Review & merge" defp default_field_choices do @mergeable_fields diff --git a/lib/kith_web/live/import_wizard_live.ex b/lib/kith_web/live/import_wizard_live.ex index 5c4205e..6b16465 100644 --- a/lib/kith_web/live/import_wizard_live.ex +++ b/lib/kith_web/live/import_wizard_live.ex @@ -11,6 +11,7 @@ defmodule KithWeb.ImportWizardLive do use KithWeb, :live_view + alias Kith.Contacts.PhoneFormatter alias Kith.Imports alias Kith.Imports.Sources.MonicaApi alias Kith.Policy @@ -23,6 +24,8 @@ defmodule KithWeb.ImportWizardLive do @impl true def mount(_params, _session, socket) do + locale = account_locale(socket) + {:ok, socket |> assign(:page_title, "Import Contacts") @@ -30,18 +33,8 @@ defmodule KithWeb.ImportWizardLive do |> assign(:source, "vcard") |> assign(:api_url, "") |> assign(:api_key, "") - |> assign(:api_options, %{ - "photos" => false, - "auto_merge_duplicates" => false, - "pets" => true, - "calls" => true, - "activities" => true, - "gifts" => true, - "debts" => true, - "tasks" => true, - "reminders" => true, - "conversations" => true - }) + |> assign(:api_options, default_api_options(socket)) + |> assign(:phone_regions, build_phone_regions(locale)) |> assign(:api_testing, false) |> assign(:current_import, nil) |> assign(:progress, nil) @@ -54,6 +47,28 @@ defmodule KithWeb.ImportWizardLive do )} end + defp default_api_options(socket) do + %{ + "photos" => false, + "auto_merge_duplicates" => true, + "phone_default_region" => account_default_region(socket), + "pets" => true, + "calls" => true, + "activities" => true, + "gifts" => true, + "debts" => true, + "tasks" => true, + "reminders" => true, + "conversations" => true + } + end + + defp account_default_region(%{assigns: %{current_scope: %{account: %{locale: locale}}}}) + when is_binary(locale), + do: PhoneFormatter.region_for_locale(locale) || "" + + defp account_default_region(_socket), do: "" + @impl true def handle_params(_params, _uri, socket) do scope = socket.assigns.current_scope @@ -146,18 +161,7 @@ defmodule KithWeb.ImportWizardLive do |> assign(:source, "vcard") |> assign(:api_url, "") |> assign(:api_key, "") - |> assign(:api_options, %{ - "photos" => false, - "auto_merge_duplicates" => false, - "pets" => true, - "calls" => true, - "activities" => true, - "gifts" => true, - "debts" => true, - "tasks" => true, - "reminders" => true, - "conversations" => true - }) + |> assign(:api_options, default_api_options(socket)) |> assign(:api_testing, false) |> assign(:current_import, nil) |> assign(:progress, nil) @@ -165,6 +169,11 @@ defmodule KithWeb.ImportWizardLive do |> assign(:error, nil)} end + def handle_event("set_phone_region", %{"region" => region}, socket) do + options = Map.put(socket.assigns.api_options, "phone_default_region", region) + {:noreply, assign(socket, :api_options, options)} + end + # ── PubSub handlers ──────────────────────────────────────────────────────── @impl true @@ -344,11 +353,28 @@ defmodule KithWeb.ImportWizardLive do end defp build_api_options(socket) do + # Pass options through unchanged so non-boolean settings (phone_default_region) + # survive. MonicaApi reads each key directly and treats falsy as off. socket.assigns.api_options - |> Enum.filter(fn {_k, v} -> v end) - |> Enum.into(%{}, fn {k, _v} -> {k, true} end) end + defp build_phone_regions(locale) do + [{"", phone_off_label(locale)} | PhoneFormatter.supported_regions(locale)] + end + + defp account_locale(%{assigns: %{current_scope: %{account: %{locale: locale}}}}) + when is_binary(locale), + do: locale + + defp account_locale(_), do: "en" + + defp phone_off_label("en"), do: "Don't normalize bare numbers" + defp phone_off_label("fr"), do: "Ne pas normaliser les numéros sans indicatif" + defp phone_off_label("de"), do: "Nackte Nummern nicht normalisieren" + defp phone_off_label("es"), do: "No normalizar números sin prefijo" + defp phone_off_label("pt"), do: "Não normalizar números sem prefixo" + defp phone_off_label(_), do: "Don't normalize bare numbers" + # ── Render ────────────────────────────────────────────────────────────────── @impl true @@ -554,10 +580,36 @@ defmodule KithWeb.ImportWizardLive do Auto-merge definite duplicates

- Merge contacts with identical name + email or name + phone + Merge contacts that share 2+ strong signals (email, phone, address) or share an email/phone and an address

+
+ +
diff --git a/lib/kith_web/router.ex b/lib/kith_web/router.ex index 4665321..79f81b1 100644 --- a/lib/kith_web/router.ex +++ b/lib/kith_web/router.ex @@ -371,13 +371,26 @@ defmodule KithWeb.Router do live "/contacts/:id/immich-review", ContactLive.ImmichReview, :index # Admin pages - live "/admin/oban", AdminLive.ObanDashboard, :index live "/settings/audit-log", SettingsLive.AuditLog, :index end post "/users/update-password", UserSessionController, :update_password end + # Admin-only Oban Web dashboard. + # The oban_dashboard macro defines its own internal live_session, so it must + # be placed outside any other live_session block. + scope "/admin" do + pipe_through [:browser, :require_authenticated_user, :require_confirmed_user] + + import Oban.Web.Router + + oban_dashboard("/oban", + on_mount: [{KithWeb.UserAuth, :require_admin}], + csp_nonce_assign_key: :csp_nonce + ) + end + # WebAuthn registration (authenticated, JSON over session) scope "/auth/webauthn", KithWeb do pipe_through [:browser_json, :require_authenticated_user] diff --git a/lib/kith_web/user_auth.ex b/lib/kith_web/user_auth.ex index e66ef93..06df9ca 100644 --- a/lib/kith_web/user_auth.ex +++ b/lib/kith_web/user_auth.ex @@ -243,6 +243,25 @@ defmodule KithWeb.UserAuth do end end + def on_mount(:require_admin, params, session, socket) do + case on_mount(:require_authenticated, params, session, socket) do + {:cont, socket} -> + if Kith.Policy.can?(socket.assigns.current_scope.user, :manage, :oban) do + {:cont, socket} + else + socket = + socket + |> Phoenix.LiveView.put_flash(:error, "Admin access required.") + |> Phoenix.LiveView.redirect(to: ~p"/dashboard") + + {:halt, socket} + end + + {:halt, _} = halt -> + halt + end + end + def on_mount(:require_sudo_mode, _params, session, socket) do socket = mount_current_scope(socket, session) diff --git a/mix.exs b/mix.exs index 9c90417..e97f5e2 100644 --- a/mix.exs +++ b/mix.exs @@ -36,7 +36,8 @@ defmodule Kith.MixProject do [ plt_file: {:no_warn, "priv/plts/dialyzer.plt"}, plt_add_apps: [:mix, :ex_unit], - flags: [:error_handling, :underspecs, :unknown] + flags: [:error_handling, :underspecs, :unknown], + ignore_warnings: ".dialyzer_ignore.exs" ] end @@ -61,7 +62,7 @@ defmodule Kith.MixProject do {:telemetry_poller, "~> 1.0"}, {:gettext, "~> 1.0"}, {:jason, "~> 1.2"}, - {:dns_cluster, "~> 0.2.0"}, + {:libcluster, "~> 3.4"}, {:bandit, "~> 1.5"}, {:heroicons, github: "tailwindlabs/heroicons", @@ -73,6 +74,7 @@ defmodule Kith.MixProject do # Background Jobs {:oban, "~> 2.18"}, + {:oban_web, "~> 2.11"}, # Email {:swoosh, "~> 1.17"}, @@ -106,6 +108,7 @@ defmodule Kith.MixProject do {:ex_cldr, "~> 2.40"}, {:ex_cldr_dates_times, "~> 2.20"}, {:ex_cldr_numbers, "~> 2.33"}, + {:ex_cldr_territories, "~> 2.9"}, # Logging & Observability {:logger_json, "~> 6.0"}, @@ -129,6 +132,9 @@ defmodule Kith.MixProject do # HTML Sanitization (rich text from Trix editor) {:html_sanitize_ex, "~> 1.4"}, + # Phone number parsing & E.164 normalization (libphonenumber port) + {:ex_phone_number, "~> 0.4"}, + # Server-side sorting, filtering, and pagination {:flop, "~> 0.26"}, {:flop_phoenix, "~> 0.23"}, diff --git a/mix.lock b/mix.lock index a2ef26a..e228409 100644 --- a/mix.lock +++ b/mix.lock @@ -19,7 +19,6 @@ "decimal": {:hex, :decimal, "2.3.0", "3ad6255aa77b4a3c4f818171b12d237500e63525c2fd056699967a3e7ea20f62", [:mix], [], "hexpm", "a4d66355cb29cb47c3cf30e71329e58361cfcb37c34235ef3bf1d7bf3773aeac"}, "dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"}, "digital_token": {:hex, :digital_token, "1.0.0", "454a4444061943f7349a51ef74b7fb1ebd19e6a94f43ef711f7dae88c09347df", [:mix], [{:cldr_utils, "~> 2.17", [hex: :cldr_utils, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "8ed6f5a8c2fa7b07147b9963db506a1b4c7475d9afca6492136535b064c9e9e6"}, - "dns_cluster": {:hex, :dns_cluster, "0.2.0", "aa8eb46e3bd0326bd67b84790c561733b25c5ba2fe3c7e36f28e88f384ebcb33", [:mix], [], "hexpm", "ba6f1893411c69c01b9e8e8f772062535a4cf70f3f35bcc964a324078d8c8240"}, "ecto": {:hex, :ecto, "3.13.5", "9d4a69700183f33bf97208294768e561f5c7f1ecf417e0fa1006e4a91713a834", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df9efebf70cf94142739ba357499661ef5dbb559ef902b68ea1f3c1fabce36de"}, "ecto_sql": {:hex, :ecto_sql, "3.13.5", "2f8282b2ad97bf0f0d3217ea0a6fff320ead9e2f8770f810141189d182dc304e", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aa36751f4e6a2b56ae79efb0e088042e010ff4935fc8684e74c23b1f49e25fdc"}, "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"}, @@ -34,8 +33,10 @@ "ex_cldr_currencies": {:hex, :ex_cldr_currencies, "2.17.1", "89947c7102ff1b46fc46095624239a1c3d72499b19ed650597630771d9e4a662", [:mix], [{:ex_cldr, "~> 2.38", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "e266a0a61f4c7d83608154d49b59e4d7485b2aaa7ba1d0e17b3c55910595de51"}, "ex_cldr_dates_times": {:hex, :ex_cldr_dates_times, "2.25.6", "6db974ab2b430b5733994c2bfbe98a69e25eeb076b876a929791ff521f8fdd96", [:mix], [{:calendar_interval, "~> 0.2", [hex: :calendar_interval, repo: "hexpm", optional: true]}, {:ex_cldr_calendars, "~> 2.4", [hex: :ex_cldr_calendars, repo: "hexpm", optional: false]}, {:ex_cldr_units, "~> 3.20", [hex: :ex_cldr_units, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:tz, "~> 0.26", [hex: :tz, repo: "hexpm", optional: true]}], "hexpm", "926ff5662b849f86088832ee66b61a96aab0fa5a54d5e14240e08ad3030663e2"}, "ex_cldr_numbers": {:hex, :ex_cldr_numbers, "2.38.1", "e5124e288a8e672831e10d39530ecb5329bc9af2169709ebfbadc814cae7d4fb", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:digital_token, "~> 0.3 or ~> 1.0", [hex: :digital_token, repo: "hexpm", optional: false]}, {:ex_cldr, "~> 2.45", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:ex_cldr_currencies, "~> 2.17", [hex: :ex_cldr_currencies, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "4f95738f1dc4e821485e52226666f7691c9276bf6eba49cba8d23c8a2db05e84"}, + "ex_cldr_territories": {:hex, :ex_cldr_territories, "2.12.0", "3c69917e67256a29e7d4eff9a12b9340186f6c5cea36ebef83fb67e5452064c9", [:mix], [{:ex_cldr, "~> 2.47", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "d12bdd3dcc1debaed3268deed6a0d8f53409f540e6a3b1410ede6cf3a6a1f768"}, "ex_hash_ring": {:hex, :ex_hash_ring, "6.0.4", "bef9d2d796afbbe25ab5b5a7ed746e06b99c76604f558113c273466d52fa6d6b", [:mix], [], "hexpm", "89adabf31f7d3dfaa36802ce598ce918e9b5b33bae8909ac1a4d052e1e567d18"}, "ex_machina": {:hex, :ex_machina, "2.8.0", "a0e847b5712065055ec3255840e2c78ef9366634d62390839d4880483be38abe", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm", "79fe1a9c64c0c1c1fab6c4fa5d871682cb90de5885320c187d117004627a7729"}, + "ex_phone_number": {:hex, :ex_phone_number, "0.4.11", "89f3f96f4b4c1404ae89b8a2f24397fd353a1d0d4b7dd39b2a633a23a4cf82b5", [:mix], [{:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: false]}], "hexpm", "cefa61b4fd4f946a1813f19fcfce1370907d31261716fb7e7d04da775ad5d9c6"}, "expo": {:hex, :expo, "1.1.1", "4202e1d2ca6e2b3b63e02f69cfe0a404f77702b041d02b58597c00992b601db5", [:mix], [], "hexpm", "5fb308b9cb359ae200b7e23d37c76978673aa1b06e2b3075d814ce12c5811640"}, "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"}, "finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"}, @@ -57,6 +58,7 @@ "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "jumper": {:hex, :jumper, "1.0.2", "68cdcd84472a00ac596b4e6459a41b3062d4427cbd4f1e8c8793c5b54f1406a7", [:mix], [], "hexpm", "9b7782409021e01ab3c08270e26f36eb62976a38c1aa64b2eaf6348422f165e1"}, "lazy_html": {:hex, :lazy_html, "0.1.10", "ffe42a0b4e70859cf21a33e12a251e0c76c1dff76391609bd56702a0ef5bc429", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.9.0", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:fine, "~> 0.1.0", [hex: :fine, repo: "hexpm", optional: false]}], "hexpm", "50f67e5faa09d45a99c1ddf3fac004f051997877dc8974c5797bb5ccd8e27058"}, + "libcluster": {:hex, :libcluster, "3.5.0", "5ee4cfde4bdf32b2fef271e33ce3241e89509f4344f6c6a8d4069937484866ba", [:mix], [{:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.3", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ebf6561fcedd765a4cd43b4b8c04b1c87f4177b5fb3cbdfe40a780499d72f743"}, "logger_json": {:hex, :logger_json, "6.2.1", "a1db30e1164e6057f2328a1e4d6b632b9583c015574fdf6c38cf73721128edcb", [:mix], [{:decimal, ">= 0.0.0", [hex: :decimal, repo: "hexpm", optional: true]}, {:ecto, "~> 3.11", [hex: :ecto, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:plug, "~> 1.15", [hex: :plug, repo: "hexpm", optional: true]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "34acd0bfd419d5fcf08c4108a8a4b59b695fcc60409dc1dd1a868b70c42e1d1f"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, @@ -69,6 +71,8 @@ "nimble_ownership": {:hex, :nimble_ownership, "1.0.2", "fa8a6f2d8c592ad4d79b2ca617473c6aefd5869abfa02563a77682038bf916cf", [:mix], [], "hexpm", "098af64e1f6f8609c6672127cfe9e9590a5d3fcdd82bc17a377b8692fd81a879"}, "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "oban": {:hex, :oban, "2.20.3", "e4d27336941955886cc7113420c32c63b70b64f10b27e08e3cf2b001153953cd", [:mix], [{:ecto_sql, "~> 3.10", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:ecto_sqlite3, "~> 0.9", [hex: :ecto_sqlite3, repo: "hexpm", optional: true]}, {:igniter, "~> 0.5", [hex: :igniter, repo: "hexpm", optional: true]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: true]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.20", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 1.3", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "075ffbf1279a96bec495bc63d647b08929837d70bcc0427249ffe4d1dddaec33"}, + "oban_met": {:hex, :oban_met, "1.0.6", "2a5500aff496b7ac4b830b0b03b08e920625a051bb6890981fbb53b15f1cbdc0", [:mix], [{:oban, "~> 2.19", [hex: :oban, repo: "hexpm", optional: false]}], "hexpm", "15ea3303de76225878a8e6c25a9d62bd1e2e9dd1c46ac8487d873b9f99e8dcee"}, + "oban_web": {:hex, :oban_web, "2.11.8", "be6521b5b1eb6d4182f40f5acc948ea65d243451b94c26f06a7329575748f695", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}, {:oban, "~> 2.19", [hex: :oban, repo: "hexpm", optional: false]}, {:oban_met, "~> 1.0", [hex: :oban_met, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.7", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 1.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}], "hexpm", "d0c04a836d929ef037e96be142285238275aabbafe62543bbdcc3f541d29ec30"}, "octo_fetch": {:hex, :octo_fetch, "0.4.0", "074b5ecbc08be10b05b27e9db08bc20a3060142769436242702931c418695b19", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "cf8be6f40cd519d7000bb4e84adcf661c32e59369ca2827c4e20042eda7a7fc6"}, "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, "pbkdf2_elixir": {:hex, :pbkdf2_elixir, "2.3.1", "073866b593887365d0ff50bb806d860a50f454bcda49b5b6f4658c9173c53889", [:mix], [{:comeonin, "~> 5.3", [hex: :comeonin, repo: "hexpm", optional: false]}], "hexpm", "ab4da7db8aeb2db20e02a1d416cbb46d0690658aafb4396878acef8748c9c319"}, diff --git a/test/kith/activities/cleanup_test.exs b/test/kith/activities/cleanup_test.exs new file mode 100644 index 0000000..7524434 --- /dev/null +++ b/test/kith/activities/cleanup_test.exs @@ -0,0 +1,47 @@ +defmodule Kith.Activities.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Activities.{Activity, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes activities for target account only", ctx do + Repo.insert!(%Activity{ + account_id: ctx.target_account, + title: "target activity", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Activity{ + account_id: ctx.other_account, + title: "other activity", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Activity, ctx.target_account) == 0 + assert count_for(Activity, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/audit_logs/cleanup_test.exs b/test/kith/audit_logs/cleanup_test.exs new file mode 100644 index 0000000..d1952da --- /dev/null +++ b/test/kith/audit_logs/cleanup_test.exs @@ -0,0 +1,52 @@ +defmodule Kith.AuditLogs.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.AuditLogs + alias Kith.AuditLogs.{AuditLog, Cleanup} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "wipes audit logs for target account only", ctx do + {:ok, _} = + AuditLogs.create_audit_log(ctx.target_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + {:ok, _} = + AuditLogs.create_audit_log(ctx.other_account, %{ + user_id: nil, + user_name: "system", + event: "account_data_reset", + metadata: %{} + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(AuditLog, ctx.target_account) == 0 + assert count_for(AuditLog, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/contacts/cleanup_test.exs b/test/kith/contacts/cleanup_test.exs new file mode 100644 index 0000000..e113159 --- /dev/null +++ b/test/kith/contacts/cleanup_test.exs @@ -0,0 +1,60 @@ +defmodule Kith.Contacts.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Contacts.{Cleanup, Contact, Tag} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + other_account: other.account_id + } + end + + test "hard-deletes contacts + tags for target account; leaves other account untouched", ctx do + contact_fixture(ctx.target_account) + contact_fixture(ctx.target_account) + contact_fixture(ctx.other_account) + + Repo.insert!(%Tag{account_id: ctx.target_account, name: "target-tag"}) + Repo.insert!(%Tag{account_id: ctx.other_account, name: "other-tag"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Contact, ctx.target_account) == 0 + assert count_for(Tag, ctx.target_account) == 0 + + assert count_for(Contact, ctx.other_account) == 1 + assert count_for(Tag, ctx.other_account) == 1 + end + + test "ignores soft-deleted vs not — hard-deletes both", ctx do + active = contact_fixture(ctx.target_account) + soft = contact_fixture(ctx.target_account) + + soft + |> Ecto.Changeset.change(deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + refute Repo.get(Contact, active.id) + refute Repo.get(Contact, soft.id) + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/contacts/phone_formatter_test.exs b/test/kith/contacts/phone_formatter_test.exs index 5dfd878..0b35009 100644 --- a/test/kith/contacts/phone_formatter_test.exs +++ b/test/kith/contacts/phone_formatter_test.exs @@ -3,7 +3,7 @@ defmodule Kith.Contacts.PhoneFormatterTest do alias Kith.Contacts.PhoneFormatter - describe "normalize/1" do + describe "normalize/1 (no region — opt-in normalization)" do test "returns nil for nil" do assert {:ok, nil} = PhoneFormatter.normalize(nil) end @@ -12,40 +12,124 @@ defmodule Kith.Contacts.PhoneFormatterTest do assert {:ok, nil} = PhoneFormatter.normalize("") end - test "preserves E.164 input" do - assert {:ok, "+12345678901"} = PhoneFormatter.normalize("+12345678901") + test "preserves E.164 input untouched" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize("+12025550100") end - test "preserves bare 10-digit number without adding country code" do - assert {:ok, "2345678901"} = PhoneFormatter.normalize("2345678901") + test "parses +prefixed number with formatting to E.164" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize("+1 202 555-0100") end - test "strips formatting from 10-digit number" do - assert {:ok, "2345678901"} = PhoneFormatter.normalize("(234) 567-8901") + test "parses international +prefixed number" do + assert {:ok, "+442079460958"} = PhoneFormatter.normalize("+44 20 7946 0958") end - test "does not assume country code for 10-digit numbers" do - assert {:ok, "9876543210"} = PhoneFormatter.normalize("987-654-3210") + test "leaves bare number unchanged (no region context)" do + # Without a default region, we can't safely interpret what country this + # 10-digit number belongs to. Returned trimmed-only. + assert {:ok, "2025550100"} = PhoneFormatter.normalize("2025550100") end - test "handles 11-digit number starting with 1" do - assert {:ok, "+12345678901"} = PhoneFormatter.normalize("12345678901") + test "leaves formatted bare number trimmed-but-otherwise-unchanged" do + assert {:ok, "(202) 555-0100"} = PhoneFormatter.normalize("(202) 555-0100") end - test "handles international number with +" do - assert {:ok, "+442079460958"} = PhoneFormatter.normalize("+44 20 7946 0958") + test "trims whitespace around E.164 input" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize(" +1 202 555 0100 ") + end + + test "returns unparseable +prefixed input as-is" do + # +0 is not a valid country code; libphonenumber rejects it. + assert {:ok, "+0"} = PhoneFormatter.normalize("+0") + end + end + + describe "normalize/2 (with default region)" do + test "parses bare US number to E.164 with US region" do + assert {:ok, "+12025550100"} = PhoneFormatter.normalize("(202) 555-0100", "US") + end + + test "parses bare UK number to E.164 with GB region" do + assert {:ok, "+442079460958"} = PhoneFormatter.normalize("020 7946 0958", "GB") + end + + test "parses bare French number to E.164 with FR region" do + assert {:ok, "+33612345678"} = PhoneFormatter.normalize("06 12 34 56 78", "FR") + end + + test "+prefixed number ignores the default region argument" do + # The number is unambiguously German; passing "US" must not override. + assert {:ok, "+4915155555555"} = PhoneFormatter.normalize("+49 151 5555 5555", "US") + end + + test "explicit nil region is equivalent to normalize/1" do + assert PhoneFormatter.normalize("(202) 555-0100") == + PhoneFormatter.normalize("(202) 555-0100", nil) + end + + test "returns original on unparseable input with region" do + assert {:ok, "garbage"} = PhoneFormatter.normalize("garbage", "US") + end + + test "returns nil for nil regardless of region" do + assert {:ok, nil} = PhoneFormatter.normalize(nil, "FR") + end + end + + describe "region_for_locale/1" do + test "maps common locales to regions" do + assert "US" = PhoneFormatter.region_for_locale("en") + assert "FR" = PhoneFormatter.region_for_locale("fr") + assert "DE" = PhoneFormatter.region_for_locale("de") + assert "JP" = PhoneFormatter.region_for_locale("ja") + end + + test "strips locale subtag" do + assert "US" = PhoneFormatter.region_for_locale("en-GB") + assert "FR" = PhoneFormatter.region_for_locale("fr_CA") end - test "adds + to 7+ digit numbers without it" do - assert {:ok, "+1234567"} = PhoneFormatter.normalize("1234567") + test "returns nil for unknown locales" do + assert is_nil(PhoneFormatter.region_for_locale("xx")) + assert is_nil(PhoneFormatter.region_for_locale("")) + assert is_nil(PhoneFormatter.region_for_locale(nil)) end + end + + describe "supported_regions/1" do + test "returns parser-supported regions with localized labels and calling codes" do + regions = PhoneFormatter.supported_regions("en") + + # libphonenumber supports ~250 regions; we intersect with CLDR + # country_codes so continents/aggregates are excluded. + assert length(regions) > 200 + + assert Enum.all?(regions, fn {code, label} -> + is_binary(code) and byte_size(code) == 2 and + is_binary(label) and String.contains?(label, "(+") + end) + + # Spot-check known entries + assert Enum.find(regions, fn {code, _} -> code == "US" end) == + {"US", "United States (+1)"} + + assert {_code, label} = Enum.find(regions, fn {code, _} -> code == "GB" end) + assert label =~ "United Kingdom" + assert label =~ "+44" + end + + test "returns localized names for non-English locales" do + en = PhoneFormatter.supported_regions("en") |> Map.new() + fr = PhoneFormatter.supported_regions("fr") |> Map.new() - test "preserves short numbers as-is" do - assert {:ok, "12345"} = PhoneFormatter.normalize("12345") + refute en["US"] == fr["US"] + assert fr["US"] =~ "(+1)" end - test "handles whitespace" do - assert {:ok, "+12345678901"} = PhoneFormatter.normalize(" +1 234 567 8901 ") + test "is sorted by label" do + regions = PhoneFormatter.supported_regions("en") + labels = Enum.map(regions, &elem(&1, 1)) + assert labels == Enum.sort(labels) end end diff --git a/test/kith/contacts_sub_entities_test.exs b/test/kith/contacts_sub_entities_test.exs index be7823a..27ae4d7 100644 --- a/test/kith/contacts_sub_entities_test.exs +++ b/test/kith/contacts_sub_entities_test.exs @@ -130,6 +130,34 @@ defmodule Kith.ContactsSubEntitiesTest do {:ok, _} = Contacts.delete_contact_field(field) assert Contacts.list_contact_fields(contact.id) == [] end + + test "create_contact_field/3 with normalize: false skips phone normalization", + %{contact: contact, account_id: account_id} do + phone_type = + Enum.find(Contacts.list_contact_field_types(account_id), fn t -> + t.protocol in ["tel", "tel:"] + end) + + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = + Contacts.create_contact_field(contact, attrs, normalize: false) + + assert field.value == "+1 (202) 555-0100" + end + + test "create_contact_field/3 with normalize: true (default) normalizes phone", + %{contact: contact, account_id: account_id} do + phone_type = + Enum.find(Contacts.list_contact_field_types(account_id), fn t -> + t.protocol in ["tel", "tel:"] + end) + + attrs = %{"contact_field_type_id" => phone_type.id, "value" => "+1 (202) 555-0100"} + + assert {:ok, field} = Contacts.create_contact_field(contact, attrs) + assert field.value == "+12025550100" + end end ## Relationships diff --git a/test/kith/conversations/cleanup_test.exs b/test/kith/conversations/cleanup_test.exs new file mode 100644 index 0000000..32a3a9f --- /dev/null +++ b/test/kith/conversations/cleanup_test.exs @@ -0,0 +1,72 @@ +defmodule Kith.Conversations.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Conversations.{Cleanup, Conversation, Message} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes conversations (CASCADE messages) for target; leaves other untouched", ctx do + target_conv = insert_conversation!(ctx.target_account, ctx.target_user, ctx.target_contact.id) + other_conv = insert_conversation!(ctx.other_account, ctx.other_user, ctx.other_contact.id) + + insert_message!(target_conv.id, ctx.target_account) + insert_message!(other_conv.id, ctx.other_account) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Conversation, ctx.target_account) == 0 + assert count_for(Message, ctx.target_account) == 0 + + assert count_for(Conversation, ctx.other_account) == 1 + assert count_for(Message, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp insert_conversation!(account_id, user_id, contact_id) do + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact_id, + subject: "test", + platform: "other", + status: "active" + }) + end + + defp insert_message!(conversation_id, account_id) do + Repo.insert!(%Message{ + account_id: account_id, + conversation_id: conversation_id, + body: "hi", + direction: "sent", + sent_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/imports/cleanup_test.exs b/test/kith/imports/cleanup_test.exs new file mode 100644 index 0000000..8308beb --- /dev/null +++ b/test/kith/imports/cleanup_test.exs @@ -0,0 +1,60 @@ +defmodule Kith.Imports.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Imports + alias Kith.Imports.{Cleanup, Import, ImportRecord} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes imports + import_records for target account; leaves other account untouched", ctx do + target_import = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(ctx.other_account, ctx.other_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + {:ok, _} = Imports.record_imported_entity(target_import, "contact", "1", "contact", 999) + {:ok, _} = Imports.record_imported_entity(other_import, "contact", "1", "contact", 999) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Import, ctx.target_account) == 0 + assert count_for(ImportRecord, ctx.target_account) == 0 + + # Control account untouched + assert count_for(Import, ctx.other_account) == 1 + assert count_for(ImportRecord, ctx.other_account) == 1 + end + + test "is idempotent on an account with no import data", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/imports/job_cancellation_test.exs b/test/kith/imports/job_cancellation_test.exs new file mode 100644 index 0000000..2878996 --- /dev/null +++ b/test/kith/imports/job_cancellation_test.exs @@ -0,0 +1,125 @@ +defmodule Kith.Imports.JobCancellationTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Imports.JobCancellation + alias Kith.Repo + alias Kith.Workers.{DuplicateDetectionWorker, ImportWorker, MonicaPhotoSyncWorker} + + import Kith.AccountsFixtures + import Kith.ImportsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + target_import = + import_fixture(target.account_id, target.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + other_import = + import_fixture(other.account_id, other.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + %{ + target_account: target.account_id, + target_import: target_import, + other_account: other.account_id, + other_import: other_import + } + end + + test "cancels target account's import jobs; leaves other account's jobs alone", ctx do + {:ok, target_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + {:ok, other_photo_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.other_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_photo_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_photo_job.id).state == "available" + end + + test "cancels DuplicateDetectionWorker jobs by account_id", ctx do + {:ok, target_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.target_account})) + + {:ok, other_dup_job} = + Oban.insert(DuplicateDetectionWorker.new(%{account_id: ctx.other_account})) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_dup_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_dup_job.id).state == "available" + end + + test "cancels ImportWorker jobs by account_id", ctx do + {:ok, target_job} = + Oban.insert( + ImportWorker.new(%{ + "account_id" => ctx.target_account, + "user_id" => 1, + "file_data" => "BEGIN:VCARD\nEND:VCARD\n" + }) + ) + + {:ok, other_job} = + Oban.insert( + ImportWorker.new(%{ + "account_id" => ctx.other_account, + "user_id" => 1, + "file_data" => "BEGIN:VCARD\nEND:VCARD\n" + }) + ) + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, target_job.id).state == "cancelled" + assert Repo.get!(Oban.Job, other_job.id).state == "available" + end + + test "is a no-op when account has no jobs", ctx do + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + end + + test "ignores jobs already in 'completed' state", ctx do + {:ok, completed_job} = + Oban.insert( + MonicaPhotoSyncWorker.new(%{ + "import_id" => ctx.target_import.id, + "credential_url" => "x", + "credential_api_key" => "y" + }) + ) + + # Manually mark as completed + completed_job + |> Ecto.Changeset.change(state: "completed", completed_at: DateTime.utc_now()) + |> Repo.update!() + + assert :ok = JobCancellation.wipe_for_account(ctx.target_account) + + # Completed jobs are NOT touched + assert Repo.get!(Oban.Job, completed_job.id).state == "completed" + end +end diff --git a/test/kith/imports/sources/monica_api/rate_limiter_test.exs b/test/kith/imports/sources/monica_api/rate_limiter_test.exs new file mode 100644 index 0000000..48eca8f --- /dev/null +++ b/test/kith/imports/sources/monica_api/rate_limiter_test.exs @@ -0,0 +1,72 @@ +defmodule Kith.Imports.Sources.MonicaApi.RateLimiterTest do + use ExUnit.Case, async: false + + alias Kith.Imports.Sources.MonicaApi.RateLimiter + + # Tests run with the real Hammer backend; we use a unique host per test + # so buckets do not collide between tests. We override the scale window + # and retry sleep to keep the suite fast — the production values live + # in config/config.exs. + + setup do + prev_limit = Application.get_env(:kith, :monica_rate_limit) + prev_scale = Application.get_env(:kith, :monica_rate_limit_scale_ms) + prev_retry = Application.get_env(:kith, :monica_rate_limit_retry_sleep_ms) + + Application.put_env(:kith, :monica_rate_limit, 1) + Application.put_env(:kith, :monica_rate_limit_scale_ms, 300) + Application.put_env(:kith, :monica_rate_limit_retry_sleep_ms, 50) + + on_exit(fn -> + Application.put_env(:kith, :monica_rate_limit, prev_limit) + Application.put_env(:kith, :monica_rate_limit_scale_ms, prev_scale) + Application.put_env(:kith, :monica_rate_limit_retry_sleep_ms, prev_retry) + end) + + :ok + end + + defp unique_host, do: "test-#{System.unique_integer([:positive])}.example" + + describe "wait!/1" do + test "returns :ok immediately while under the per-window budget" do + host = unique_host() + + {us, _} = + :timer.tc(fn -> assert :ok = RateLimiter.wait!("https://#{host}") end) + + assert us < 30_000, "expected sub-30ms for one call under the budget, got #{us}us" + end + + test "sleeps once the budget is exhausted" do + host = unique_host() + :ok = RateLimiter.wait!("https://#{host}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host}") end) + + assert us >= 30_000, "expected ≥30ms wait when over budget, got #{us}us" + assert us < 1_000_000, "did not expect ≥1s wait; window should have rolled by now" + end + + test "per-host buckets do not share quota" do + host_a = unique_host() + host_b = unique_host() + + :ok = RateLimiter.wait!("https://#{host_a}") + + {us, _} = :timer.tc(fn -> RateLimiter.wait!("https://#{host_b}") end) + assert us < 30_000, "host_b should be in its own bucket" + end + + test "extracts the host portion of a URL for the bucket key" do + host = unique_host() + url1 = "https://#{host}/api/contacts" + url2 = "https://#{host}/api/me" + + :ok = RateLimiter.wait!(url1) + + {us, _} = :timer.tc(fn -> RateLimiter.wait!(url2) end) + assert us >= 30_000, "same host → same bucket → second call should wait" + end + end +end diff --git a/test/kith/imports/sources/monica_api_test.exs b/test/kith/imports/sources/monica_api_test.exs index f87e9b7..2d4e001 100644 --- a/test/kith/imports/sources/monica_api_test.exs +++ b/test/kith/imports/sources/monica_api_test.exs @@ -585,65 +585,6 @@ defmodule Kith.Imports.Sources.MonicaApiTest do end end - # ── crawl/5 — photo crawl ──────────────────────────────────────────── - - describe "crawl/5 — photo crawl" do - test "imports photos from paginated photos endpoint", %{user: user, account_id: account_id} do - # Small 1x1 JPEG encoded as data URL - pixel = Base.encode64(<<0xFF, 0xD8, 0xFF, 0xE0>>) - data_url = "data:image/jpeg;base64,#{pixel}" - - contacts = [contact_json(id: 1, first_name: "PhotoPerson")] - - photos = [ - photo_json( - id: 1, - data_url: data_url, - contact: contact_short_json(1, Ecto.UUID.generate(), "PhotoPerson", "Test") - ) - ] - - {:ok, agent} = Agent.start_link(fn -> 0 end) - - Req.Test.stub(@stub_name, fn conn -> - call = Agent.get_and_update(agent, fn n -> {n + 1, n + 1} end) - - if call == 1 do - Req.Test.json(conn, contacts_page_json(contacts)) - else - Req.Test.json(conn, photos_page_json(photos)) - end - end) - - import_job = api_import_fixture(account_id, user.id) - - assert {:ok, _} = - MonicaApi.crawl(account_id, user.id, credential(), import_job, %{"photos" => true}) - - # Verify photos endpoint was called - assert Agent.get(agent, & &1) == 2 - Agent.stop(agent) - end - - test "skips photos when opt-out", %{user: user, account_id: account_id} do - contacts = [contact_json(id: 1, first_name: "NoPhotos")] - - {:ok, agent} = Agent.start_link(fn -> 0 end) - - Req.Test.stub(@stub_name, fn conn -> - Agent.update(agent, &(&1 + 1)) - Req.Test.json(conn, contacts_page_json(contacts)) - end) - - import_job = api_import_fixture(account_id, user.id) - assert {:ok, _} = MonicaApi.crawl(account_id, user.id, credential(), import_job, %{}) - - # Only contacts page, no photos - assert Agent.get(agent, & &1) == 1 - Agent.stop(agent) - end - end - # ── crawl/5 — rate limiting ────────────────────────────────────────── describe "crawl/5 — rate limiting" do @@ -925,7 +866,202 @@ defmodule Kith.Imports.Sources.MonicaApiTest do |> Enum.map(& &1.value) |> Enum.sort() - assert fields == ["+5551234", "fieldy@test.com"] + # Without a `phone_default_region` in opts, bare numbers round-trip + # trimmed-but-unchanged — opt-in normalization preserves user input + # when the importer can't safely guess a country. + assert fields == ["555-1234", "fieldy@test.com"] + end + + test "normalizes phone fields to E.164 when phone_default_region is set", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 42, + first_name: "Regional", + contact_fields: [ + contact_field_json(content: "(202) 555-0100", type_name: "Phone"), + contact_field_json(content: "+44 20 7946 0958", type_name: "Phone") + ] + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, _} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "phone_default_region" => "US" + }) + + rec = Imports.find_import_record(account_id, "monica_api", "contact", "42") + + fields = + Repo.all(from cf in Contacts.ContactField, where: cf.contact_id == ^rec.local_entity_id) + |> Enum.map(& &1.value) + |> Enum.sort() + + # Bare US number normalized via region hint; +-prefixed UK number ignores + # the US hint and uses its own country code. + assert "+12025550100" in fields + assert "+442079460958" in fields + end + + test "phone normalization happens exactly once during import", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 99, + first_name: "OnceOnly", + contact_fields: [ + contact_field_json(content: "(202) 555-0100", type_name: "Phone") + ] + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, _} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "phone_default_region" => "US" + }) + + rec = Imports.find_import_record(account_id, "monica_api", "contact", "99") + + values = + Repo.all(from cf in Contacts.ContactField, where: cf.contact_id == ^rec.local_entity_id) + |> Enum.map(& &1.value) + + assert "+12025550100" in values + end + end + + describe "crawl/5 — misc-data plan" do + test "includes a contact when statistics.number_of_calls > 0", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 1, + first_name: "Has", + last_name: "Calls", + statistics: %{"number_of_calls" => 3} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false + }) + + assert [%{source_id: "1", endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints + end + + test "excludes a contact when all opts are off", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 2, + first_name: "AllOff", + statistics: %{"number_of_calls" => 5, "number_of_gifts" => 5} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => false, + "gifts" => false, + "pets" => false, + "activities" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert summary.misc_data_plan == [] + end + + test "includes :pets unconditionally when opt is on (no stat field)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 3, + first_name: "PetsOnly", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "pets" => true, + "calls" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: ["pets"]}] = summary.misc_data_plan + end + + test "missing statistic field is treated as >=1 (safe default)", + %{user: user, account_id: account_id} do + contacts = [ + contact_json( + id: 4, + first_name: "NoStats", + statistics: %{} + ) + ] + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, contacts_page_json(contacts, 1, 1, 1)) + end) + + import_job = api_import_fixture(account_id, user.id) + + assert {:ok, summary} = + MonicaApi.crawl(account_id, user.id, credential(), import_job, %{ + "calls" => true, + "pets" => false, + "activities" => false, + "gifts" => false, + "debts" => false, + "tasks" => false, + "reminders" => false, + "conversations" => false + }) + + assert [%{endpoints: endpoints}] = summary.misc_data_plan + assert "calls" in endpoints end end diff --git a/test/kith/journal/cleanup_test.exs b/test/kith/journal/cleanup_test.exs new file mode 100644 index 0000000..ba06ef1 --- /dev/null +++ b/test/kith/journal/cleanup_test.exs @@ -0,0 +1,50 @@ +defmodule Kith.Journal.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Journal + alias Kith.Journal.{Cleanup, Entry} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes journal entries for target account only", ctx do + {:ok, _} = + Journal.create_entry(ctx.target_account, ctx.target_user, %{ + "content" => "target", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + {:ok, _} = + Journal.create_entry(ctx.other_account, ctx.other_user, %{ + "content" => "other", + "occurred_at" => DateTime.utc_now() |> DateTime.truncate(:second) + }) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Entry, ctx.target_account) == 0 + assert count_for(Entry, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/policy_test.exs b/test/kith/policy_test.exs new file mode 100644 index 0000000..ba00a5b --- /dev/null +++ b/test/kith/policy_test.exs @@ -0,0 +1,27 @@ +defmodule Kith.PolicyTest do + use ExUnit.Case, async: true + + alias Kith.Accounts.User + alias Kith.Policy + + describe "can?/3 for :oban resource" do + test "admin can manage Oban" do + assert Policy.can?(%User{role: "admin"}, :manage, :oban) + assert Policy.can?(%User{role: "admin"}, :read, :oban) + end + + test "editor cannot access Oban" do + refute Policy.can?(%User{role: "editor"}, :manage, :oban) + refute Policy.can?(%User{role: "editor"}, :read, :oban) + end + + test "viewer cannot access Oban" do + refute Policy.can?(%User{role: "viewer"}, :manage, :oban) + refute Policy.can?(%User{role: "viewer"}, :read, :oban) + end + + test "unknown role cannot access Oban" do + refute Policy.can?(%User{role: "ghost"}, :manage, :oban) + end + end +end diff --git a/test/kith/reminders/cleanup_test.exs b/test/kith/reminders/cleanup_test.exs new file mode 100644 index 0000000..91cfc85 --- /dev/null +++ b/test/kith/reminders/cleanup_test.exs @@ -0,0 +1,73 @@ +defmodule Kith.Reminders.CleanupTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Reminders.{Cleanup, Reminder, ReminderInstance} + alias Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + target_contact = contact_fixture(target.account_id) + other_contact = contact_fixture(other.account_id) + + %{ + target_account: target.account_id, + target_user: target.id, + target_contact: target_contact, + other_account: other.account_id, + other_user: other.id, + other_contact: other_contact + } + end + + test "wipes reminders + CASCADE rules/instances for target only", ctx do + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + other_reminder = reminder_fixture(ctx.other_account, ctx.other_contact.id, ctx.other_user) + + _target_instance = reminder_instance_fixture(target_reminder) + _other_instance = reminder_instance_fixture(other_reminder) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Reminder, ctx.target_account) == 0 + # ReminderRule is account-scoped (no reminder_id FK); verify it still exists for other account + # ReminderInstance has a reminder_id FK — CASCADE should remove it + assert count_orphans(ReminderInstance, [target_reminder.id]) == 0 + + assert count_for(Reminder, ctx.other_account) == 1 + end + + test "cancels Oban jobs tracked on the target's reminders", ctx do + {:ok, job} = + Oban.insert(Kith.Workers.ReminderNotificationWorker.new(%{"reminder_instance_id" => 0})) + + target_reminder = reminder_fixture(ctx.target_account, ctx.target_contact.id, ctx.target_user) + + target_reminder + |> Ecto.Changeset.change(enqueued_oban_job_ids: [job.id]) + |> Repo.update!() + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert Repo.get!(Oban.Job, job.id).state == "cancelled" + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end + + defp count_orphans(schema, reminder_ids) do + Repo.aggregate(from(s in schema, where: s.reminder_id in ^reminder_ids), :count) + end +end diff --git a/test/kith/storage/account_cleanup_test.exs b/test/kith/storage/account_cleanup_test.exs new file mode 100644 index 0000000..c72fb38 --- /dev/null +++ b/test/kith/storage/account_cleanup_test.exs @@ -0,0 +1,92 @@ +defmodule Kith.Storage.AccountCleanupTest do + use Kith.DataCase, async: false + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Storage + alias Kith.Storage.AccountCleanup + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "deletes target account's photo + import-upload files; leaves other account's files alone", + ctx do + {target_photo_key, _} = upload_and_attach_photo!(ctx.target_account) + {other_photo_key, _} = upload_and_attach_photo!(ctx.other_account) + + target_upload_key = upload_import_file!(ctx.target_account, ctx.target_user) + other_upload_key = upload_import_file!(ctx.other_account, ctx.other_user) + + # Ensure ALL files are cleaned up after the test, regardless of what wipe does. + # Files written via Storage.upload_binary are real disk I/O outside the Ecto sandbox. + on_exit(fn -> + Enum.each( + [target_photo_key, other_photo_key, target_upload_key, other_upload_key], + fn key -> _ = Storage.delete(key) end + ) + end) + + assert {:ok, _} = Storage.read(target_photo_key) + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(target_upload_key) + assert {:ok, _} = Storage.read(other_upload_key) + + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + + assert {:error, _} = Storage.read(target_photo_key) + assert {:error, _} = Storage.read(target_upload_key) + + # Control account untouched + assert {:ok, _} = Storage.read(other_photo_key) + assert {:ok, _} = Storage.read(other_upload_key) + end + + test "is a no-op when account has no files", ctx do + assert :ok = AccountCleanup.wipe_for_account(ctx.target_account) + end + + defp upload_and_attach_photo!(account_id) do + contact = contact_fixture(account_id) + binary = <<0xFF, 0xD8, 0xFF, 0xE0>> + key = Storage.generate_key(account_id, "photos", "test.jpg") + {:ok, _} = Storage.upload_binary(binary, key) + + {:ok, photo} = + Contacts.create_photo(contact, %{ + "file_name" => "test.jpg", + "storage_key" => key, + "file_size" => byte_size(binary), + "content_type" => "image/jpeg" + }) + + {key, photo} + end + + defp upload_import_file!(account_id, user_id) do + uuid = Ecto.UUID.generate() + key = "#{account_id}/imports/#{uuid}.vcf" + {:ok, _} = Storage.upload_binary("BEGIN:VCARD\nEND:VCARD\n", key) + + {:ok, _} = + Imports.create_import(account_id, user_id, %{ + source: "vcard", + file_name: "export.vcf", + file_size: 22, + file_storage_key: key + }) + + key + end +end diff --git a/test/kith/tasks/cleanup_test.exs b/test/kith/tasks/cleanup_test.exs new file mode 100644 index 0000000..3c35fef --- /dev/null +++ b/test/kith/tasks/cleanup_test.exs @@ -0,0 +1,41 @@ +defmodule Kith.Tasks.CleanupTest do + use Kith.DataCase, async: true + + alias Kith.Repo + alias Kith.Tasks + alias Kith.Tasks.{Cleanup, Task} + + import Ecto.Query + import Kith.AccountsFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + test "wipes tasks for target account only", ctx do + {:ok, _} = Tasks.create_task(ctx.target_account, ctx.target_user, %{"title" => "target task"}) + {:ok, _} = Tasks.create_task(ctx.other_account, ctx.other_user, %{"title" => "other task"}) + + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + + assert count_for(Task, ctx.target_account) == 0 + assert count_for(Task, ctx.other_account) == 1 + end + + test "is idempotent on empty account", ctx do + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + assert :ok = Cleanup.wipe_for_account(ctx.target_account) + end + + defp count_for(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/workers/account_reset_worker_test.exs b/test/kith/workers/account_reset_worker_test.exs new file mode 100644 index 0000000..1a1c5ff --- /dev/null +++ b/test/kith/workers/account_reset_worker_test.exs @@ -0,0 +1,190 @@ +defmodule Kith.Workers.AccountResetWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Activities.Activity + alias Kith.AuditLogs.AuditLog + alias Kith.Contacts.{Contact, Tag} + alias Kith.Conversations.Conversation + alias Kith.Imports + alias Kith.Imports.{Import, ImportRecord} + alias Kith.Journal.Entry + alias Kith.Reminders.Reminder + alias Kith.Repo + alias Kith.Tasks.Task, as: TaskSchema + alias Kith.Workers.AccountResetWorker + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + import Kith.RemindersFixtures + + setup do + target = user_fixture() + other = user_fixture() + + %{ + target_account: target.account_id, + target_user: target.id, + other_account: other.account_id, + other_user: other.id + } + end + + describe "perform/1 — regression: re-import after reset" do + test "re-import for same Monica contact id resolves to new local contact (no stale import_records)", + ctx do + # Initial import: contact + import_record for Monica id 964 + import_a = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_a = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_a, "contact", "964", "contact", contact_a.id) + + # Run reset + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target account fully wiped + assert count(Contact, ctx.target_account) == 0 + assert count(Import, ctx.target_account) == 0 + assert count(ImportRecord, ctx.target_account) == 0 + + # Re-import: new contact + new import_record for the same Monica id + import_b = + import_fixture(ctx.target_account, ctx.target_user, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + contact_b = contact_fixture(ctx.target_account) + + {:ok, _} = + Imports.record_imported_entity(import_b, "contact", "964", "contact", contact_b.id) + + # The photo-sync lookup that previously found stale data now resolves correctly + assert %{local_entity_id: local_id} = + Imports.find_import_record(ctx.target_account, "monica_api", "contact", "964") + + assert local_id == contact_b.id + end + end + + describe "perform/1 — cross-account isolation" do + test "resetting account A does not touch any data in account B", ctx do + target_contact = populate_data!(ctx.target_account, ctx.target_user) + _other_contact = populate_data!(ctx.other_account, ctx.other_user) + + before_other = snapshot(ctx.other_account) + + assert :ok = perform_job(AccountResetWorker, %{account_id: ctx.target_account}) + + # Target wiped across every domain + assert empty?(ctx.target_account) + + # Other account is bit-identical to before + assert snapshot(ctx.other_account) == before_other + + # Sanity: target_contact is gone, other account still has its contact + refute Repo.get(Contact, target_contact.id) + end + end + + defp populate_data!(account_id, user_id) do + contact = contact_fixture(account_id) + + target_import = + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "k" + }) + + {:ok, _} = + Imports.record_imported_entity(target_import, "contact", "1", "contact", contact.id) + + Repo.insert!(%Tag{account_id: account_id, name: "t"}) + + Repo.insert!(%Activity{ + account_id: account_id, + title: "a", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%TaskSchema{ + account_id: account_id, + creator_id: user_id, + title: "x" + }) + + Repo.insert!(%Entry{ + account_id: account_id, + author_id: user_id, + content: "c", + occurred_at: DateTime.utc_now() |> DateTime.truncate(:second) + }) + + Repo.insert!(%Conversation{ + account_id: account_id, + creator_id: user_id, + contact_id: contact.id, + subject: "s", + platform: "other", + status: "active" + }) + + _reminder = reminder_fixture(account_id, contact.id, user_id) + + {:ok, _} = + Kith.AuditLogs.create_audit_log(account_id, %{ + user_id: nil, + user_name: "test", + event: "account_data_reset", + metadata: %{} + }) + + contact + end + + defp snapshot(account_id) do + %{ + contacts: count(Contact, account_id), + imports: count(Import, account_id), + import_records: count(ImportRecord, account_id), + conversations: count(Conversation, account_id), + tasks: count(TaskSchema, account_id), + journal_entries: count(Entry, account_id), + reminders: count(Reminder, account_id), + tags: count(Tag, account_id), + activities: count(Activity, account_id), + audit_logs: count(AuditLog, account_id) + } + end + + defp empty?(account_id) do + snapshot(account_id) == + %{ + contacts: 0, + imports: 0, + import_records: 0, + conversations: 0, + tasks: 0, + journal_entries: 0, + reminders: 0, + tags: 0, + activities: 0, + audit_logs: 0 + } + end + + defp count(schema, account_id) do + Repo.aggregate(from(s in schema, where: s.account_id == ^account_id), :count) + end +end diff --git a/test/kith/workers/duplicate_detection_worker_test.exs b/test/kith/workers/duplicate_detection_worker_test.exs new file mode 100644 index 0000000..adb53d9 --- /dev/null +++ b/test/kith/workers/duplicate_detection_worker_test.exs @@ -0,0 +1,677 @@ +defmodule Kith.Workers.DuplicateDetectionWorkerTest do + use Kith.DataCase, async: true + use Oban.Testing, repo: Kith.Repo + + import Kith.Factory + import Kith.ContactsFixtures + + alias Kith.Contacts.DuplicateCandidate + alias Kith.Workers.DuplicateDetectionWorker + + setup do + seed_reference_data!() + {account, _user} = setup_account() + + email_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "mailto:", + select: %{id: t.id}, + limit: 1 + ) + + phone_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "tel:", + select: %{id: t.id}, + limit: 1 + ) + + %{account: account, email_type_id: email_type.id, phone_type_id: phone_type.id} + end + + defp run_detection(account_id) do + perform_job(DuplicateDetectionWorker, %{account_id: account_id}) + end + + defp pending_candidates(account_id) do + DuplicateCandidate + |> where([d], d.account_id == ^account_id) + |> where([d], d.status == "pending") + |> order_by([d], desc: d.score) + |> Repo.all() + end + + describe "name matching" do + test "detects contacts with similar display names", %{account: account} do + insert(:contact, + account: account, + display_name: "John Smith", + first_name: "John", + last_name: "Smith" + ) + + insert(:contact, + account: account, + display_name: "John Smithe", + first_name: "John", + last_name: "Smithe" + ) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "name_match" in hd(candidates).reasons + assert hd(candidates).score >= 0.5 + end + + test "does not match dissimilar names", %{account: account} do + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + end + + describe "email matching" do + test "detects contacts sharing the same email", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "email_match" in hd(candidates).reasons + assert hd(candidates).score >= 0.8 + end + + test "email matching is case-insensitive", %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "SHARED@Example.COM"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "email_match" in hd(candidates).reasons + end + + test "email matching trims surrounding whitespace", + %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Alice", + first_name: "Alice", + last_name: "" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob", + first_name: "Bob", + last_name: "" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => " Foo@BAR.com "}) + contact_field_fixture(c2, email_type_id, %{"value" => "foo@bar.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "email_match" in hd(candidates).reasons + end + + # Note: pure whitespace-only email values are rejected by the ContactField + # changeset's `validate_required`, so the cartesian-explosion case from + # Bug A can't actually manifest for emails through the normal write path. + # Coverage is provided by the "trims surrounding whitespace" case above, + # which exercises the same TRIM-in-JOIN code path on values that survive + # validation. + + test "email-only match scores around 0.85", %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Completely Different", + first_name: "Completely", + last_name: "Different" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Totally Unique", + first_name: "Totally", + last_name: "Unique" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "same@email.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "same@email.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert hd(candidates).score == 0.85 + end + end + + describe "phone matching" do + test "detects contacts sharing the same phone number", %{ + account: account, + phone_type_id: phone_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, phone_type_id, %{"value" => "+1-555-1234"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "+1-555-1234"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "phone_match" in hd(candidates).reasons + assert hd(candidates).score >= 0.7 + end + + test "phone matching is strict equality on canonical E.164 values", + %{account: account, phone_type_id: phone_type_id} do + # Phone normalization runs at write-time (see PhoneFormatter.normalize/2), + # so the detection worker assumes both values are already canonical. + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, phone_type_id, %{"value" => "+12025550100"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "+12025550100"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "phone_match" in hd(candidates).reasons + end + + test "does not cartesian-explode across formatting-only phone values", + %{account: account, phone_type_id: phone_type_id} do + # Regression for Bug A: previously the in-query regex normalized any + # zero-digit value to "" and matched it against every other zero-digit + # value (C(N,2) false candidates). With strict equality on canonical + # values plus a TRIM-non-empty filter, distinct garbage strings produce + # no matches. + # Use deliberately dissimilar names so pg_trgm doesn't generate + # name-based false positives and contaminate the assertion. + contacts_data = [ + {"Aaron Zephyr", "Aaron", "Zephyr", "+"}, + {"Quincy Bramble", "Quincy", "Bramble", "-"}, + {"Yolanda Khoury", "Yolanda", "Khoury", "()"}, + {"Vladimir Tcheng", "Vladimir", "Tcheng", "abc"}, + {"Saoirse Mwangi", "Saoirse", "Mwangi", "N/A"}, + {"Daiyu Olafsson", "Daiyu", "Olafsson", "x"} + ] + + for {display, first, last, garbage} <- contacts_data do + contact = + insert(:contact, + account: account, + display_name: display, + first_name: first, + last_name: last + ) + + contact_field_fixture(contact, phone_type_id, %{"value" => garbage}) + end + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + + assert candidates == [], + "expected no phone matches across distinct garbage values, got #{length(candidates)}" + end + + test "phone-only match scores 0.75", %{account: account, phone_type_id: phone_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Completely Different", + first_name: "Completely", + last_name: "Different" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Totally Unique", + first_name: "Totally", + last_name: "Unique" + ) + + contact_field_fixture(c1, phone_type_id, %{"value" => "5559876"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "5559876"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert hd(candidates).score == 0.75 + end + end + + describe "address matching" do + test "detects contacts sharing the same address", %{account: account} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + address_fixture(c1, %{"line1" => "123 Main St", "postal_code" => "90210"}) + address_fixture(c2, %{"line1" => "123 Main St", "postal_code" => "90210"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "address_match" in hd(candidates).reasons + assert hd(candidates).score == 0.6 + end + + test "address matching is case-insensitive and trims whitespace", %{account: account} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + address_fixture(c1, %{"line1" => " 123 Main St ", "postal_code" => "90210"}) + address_fixture(c2, %{"line1" => "123 MAIN ST", "postal_code" => "90210"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + assert "address_match" in hd(candidates).reasons + end + + test "does not match on postal_code alone", %{account: account} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + address_fixture(c1, %{"line1" => "123 Main St", "postal_code" => "90210"}) + address_fixture(c2, %{"line1" => "456 Oak Ave", "postal_code" => "90210"}) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + end + + describe "combined signals" do + test "email + name match scores higher than email alone", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "John Smith", + first_name: "John", + last_name: "Smith" + ) + + c2 = + insert(:contact, + account: account, + display_name: "John Smithe", + first_name: "John", + last_name: "Smithe" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "john@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "john@example.com"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + candidate = hd(candidates) + assert "name_match" in candidate.reasons + assert "email_match" in candidate.reasons + # email base (0.85) + bonus for name signal (0.05) = 0.90 + assert candidate.score > 0.85 + end + + test "email + phone match boosts score", %{ + account: account, + email_type_id: email_type_id, + phone_type_id: phone_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Completely Different", + first_name: "Completely", + last_name: "Different" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Totally Unique", + first_name: "Totally", + last_name: "Unique" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "same@email.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "same@email.com"}) + contact_field_fixture(c1, phone_type_id, %{"value" => "5551234"}) + contact_field_fixture(c2, phone_type_id, %{"value" => "5551234"}) + + assert :ok = run_detection(account.id) + + candidates = pending_candidates(account.id) + assert length(candidates) == 1 + candidate = hd(candidates) + assert "email_match" in candidate.reasons + assert "phone_match" in candidate.reasons + # email base (0.85) + 1 bonus (0.05) = 0.90 + assert candidate.score == 0.9 + end + end + + describe "edge cases" do + test "skips soft-deleted contacts", %{account: account, email_type_id: email_type_id} do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams", + deleted_at: DateTime.utc_now(:second) + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + + test "does not re-insert existing pending candidates", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + # First run + assert :ok = run_detection(account.id) + assert length(pending_candidates(account.id)) == 1 + + # Second run should not create duplicates + assert :ok = run_detection(account.id) + assert length(pending_candidates(account.id)) == 1 + end + + test "does not re-insert dismissed candidates", %{ + account: account, + email_type_id: email_type_id + } do + c1 = + insert(:contact, + account: account, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + # First run, then dismiss + assert :ok = run_detection(account.id) + [candidate] = pending_candidates(account.id) + Kith.DuplicateDetection.dismiss_candidate(candidate) + + # Second run should not re-create dismissed candidate + assert :ok = run_detection(account.id) + assert pending_candidates(account.id) == [] + end + + test "account isolation — only detects within same account", %{email_type_id: email_type_id} do + {account1, _} = setup_account() + {account2, _} = setup_account() + + c1 = + insert(:contact, + account: account1, + display_name: "Alice Johnson", + first_name: "Alice", + last_name: "Johnson" + ) + + c2 = + insert(:contact, + account: account2, + display_name: "Bob Williams", + first_name: "Bob", + last_name: "Williams" + ) + + contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"}) + contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"}) + + assert :ok = run_detection(account1.id) + assert :ok = run_detection(account2.id) + + assert pending_candidates(account1.id) == [] + assert pending_candidates(account2.id) == [] + end + + test "handles fewer than 2 contacts gracefully", %{account: account} do + insert(:contact, + account: account, + display_name: "Only Contact", + first_name: "Only", + last_name: "Contact" + ) + + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + + test "handles zero contacts gracefully", %{account: account} do + assert :ok = run_detection(account.id) + + assert pending_candidates(account.id) == [] + end + end + + describe "cron mode" do + test "runs for all accounts when no account_id provided" do + {account1, _} = setup_account() + {account2, _} = setup_account() + + insert(:contact, + account: account1, + display_name: "John Smith", + first_name: "John", + last_name: "Smith" + ) + + insert(:contact, + account: account1, + display_name: "John Smithe", + first_name: "John", + last_name: "Smithe" + ) + + insert(:contact, + account: account2, + display_name: "Jane Doe", + first_name: "Jane", + last_name: "Doe" + ) + + insert(:contact, + account: account2, + display_name: "Jane Doee", + first_name: "Jane", + last_name: "Doee" + ) + + assert :ok = perform_job(DuplicateDetectionWorker, %{}) + + assert length(pending_candidates(account1.id)) == 1 + assert length(pending_candidates(account2.id)) == 1 + end + end +end diff --git a/test/kith/workers/monica_api_crawl_worker_test.exs b/test/kith/workers/monica_api_crawl_worker_test.exs index 2fa6f88..cc04eb1 100644 --- a/test/kith/workers/monica_api_crawl_worker_test.exs +++ b/test/kith/workers/monica_api_crawl_worker_test.exs @@ -4,6 +4,7 @@ defmodule Kith.Workers.MonicaApiCrawlWorkerTest do alias Kith.Imports alias Kith.Workers.MonicaApiCrawlWorker + alias Kith.Workers.MonicaPhotoSyncWorker import Kith.AccountsFixtures import Kith.ContactsFixtures @@ -59,5 +60,148 @@ defmodule Kith.Workers.MonicaApiCrawlWorkerTest do assert import_job.api_options["photos"] == true assert import_job.api_options["extra_notes"] == false end + + test "build_opts forwards every wizard-saved option to the source module", + %{user: user, account_id: account_id} do + # Regression for Bug C: build_opts used to hand-curate a map containing + # only "extra_notes" — every other wizard option (auto_merge_duplicates, + # photos, pets, phone_default_region, …) was silently dropped before + # reaching MonicaApi.crawl/5. + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{ + "auto_merge_duplicates" => true, + "phone_default_region" => "US", + "photos" => true, + "pets" => true + } + }) + + opts = MonicaApiCrawlWorker.build_opts(import_job) + + assert opts["auto_merge_duplicates"] == true + assert opts["phone_default_region"] == "US" + assert opts["photos"] == true + assert opts["pets"] == true + # extra_notes defaults to true unless explicitly false + assert opts["extra_notes"] == true + end + + test "build_opts honors extra_notes=false explicitly", + %{user: user, account_id: account_id} do + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"extra_notes" => false} + }) + + assert MonicaApiCrawlWorker.build_opts(import_job)["extra_notes"] == false + end + + test "build_opts handles missing api_options map", %{user: user, account_id: account_id} do + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: nil + }) + + opts = MonicaApiCrawlWorker.build_opts(import_job) + assert opts["extra_notes"] == true + end + + test "enqueues MonicaPhotoSyncWorker when photos opt-in", %{ + user: user, + account_id: account_id + } do + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"photos" => true} + }) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + assert_enqueued( + worker: MonicaPhotoSyncWorker, + args: %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key" + } + ) + end + + test "does not enqueue MonicaPhotoSyncWorker when photos opt-out", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture_with_stub(account_id, user.id) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + refute_enqueued(worker: MonicaPhotoSyncWorker) + end + + test "enqueues MonicaMiscDataWorker with the plan from crawl summary", + %{user: user, account_id: account_id} do + # Boundary regression: the misc_data_plan key produced by + # MonicaApi.crawl/5 must reach MonicaMiscDataWorker.new/1 unmodified — + # the same wizard→crawl→worker contract that Bug C silently violated + # for auto_merge_duplicates in the previous PR. + + stub_name = :monica_crawl_misc_stub + + Application.put_env( + :kith, + :monica_req_options, + plug: {Req.Test, stub_name}, + retry: false + ) + + on_exit(fn -> Application.delete_env(:kith, :monica_req_options) end) + + import_job = + import_fixture(account_id, user.id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"calls" => true, "pets" => false} + }) + + contacts = + Kith.MonicaApiFixtures.contacts_page_json( + [ + Kith.MonicaApiFixtures.contact_json( + id: 7, + first_name: "Plan", + last_name: "Test", + statistics: %{"number_of_calls" => 2} + ) + ], + 1, + 1, + 1 + ) + + Req.Test.stub(stub_name, fn conn -> + Req.Test.json(conn, contacts) + end) + + assert :ok = perform_job(MonicaApiCrawlWorker, %{import_id: import_job.id}) + + assert_enqueued( + worker: Kith.Workers.MonicaMiscDataWorker, + args: %{"import_id" => import_job.id} + ) + end end end diff --git a/test/kith/workers/monica_misc_data_worker_test.exs b/test/kith/workers/monica_misc_data_worker_test.exs new file mode 100644 index 0000000..da5ff7e --- /dev/null +++ b/test/kith/workers/monica_misc_data_worker_test.exs @@ -0,0 +1,161 @@ +defmodule Kith.Workers.MonicaMiscDataWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + import Ecto.Query + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + + alias Kith.Imports + alias Kith.Workers.MonicaMiscDataWorker + + @stub_name MonicaMiscDataReqStub + + setup do + seed_reference_data!() + user = user_fixture() + + Application.put_env( + :kith, + :monica_req_options, + plug: {Req.Test, @stub_name}, + retry: false + ) + + on_exit(fn -> Application.delete_env(:kith, :monica_req_options) end) + + %{user: user, account_id: user.account_id} + end + + defp build_args(import_job, plan) do + %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key", + "plan" => plan + } + end + + defp api_import(account_id, user_id, api_options \\ %{}) do + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: api_options, + status: "completed" + }) + end + + describe "perform/1" do + test "fires only the endpoints listed in the plan", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [ + %{ + "source_id" => "42", + "local_id" => contact.id, + "endpoints" => ["calls", "gifts"] + } + ] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + paths = collect_requests([]) + assert "/api/contacts/42/calls" in paths + assert "/api/contacts/42/gifts" in paths + refute "/api/contacts/42/pets" in paths + refute "/api/contacts/42/activities" in paths + end + + test "exits early when the import is cancelled", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + {:ok, _} = Imports.update_import_status(import_job, "cancelled", %{}) + + contact = contact_fixture(account_id) + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "skips contacts whose local row has been soft-deleted", + %{user: user, account_id: account_id} do + import_job = api_import(account_id, user.id) + contact = contact_fixture(account_id) + + Repo.update_all( + from(c in Kith.Contacts.Contact, where: c.id == ^contact.id), + set: [deleted_at: DateTime.utc_now() |> DateTime.truncate(:second)] + ) + + pid = self() + + Req.Test.stub(@stub_name, fn conn -> + send(pid, {:request, conn.request_path}) + Req.Test.json(conn, %{"data" => []}) + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + assert collect_requests([]) == [] + end + + test "writes per-endpoint counts to import_job.summary['misc']", + %{user: user, account_id: account_id} do + contact = contact_fixture(account_id) + import_job = api_import(account_id, user.id) + + Req.Test.stub(@stub_name, fn conn -> + case conn.request_path do + "/api/contacts/1/calls" -> + Req.Test.json(conn, %{ + "data" => [ + %{"id" => 1, "called_at" => "2025-01-01T10:00:00Z", "contact_called" => true}, + %{"id" => 2, "called_at" => "2025-01-02T10:00:00Z", "contact_called" => false} + ] + }) + + _ -> + Req.Test.json(conn, %{"data" => []}) + end + end) + + plan = [%{"source_id" => "1", "local_id" => contact.id, "endpoints" => ["calls"]}] + + assert :ok = perform_job(MonicaMiscDataWorker, build_args(import_job, plan)) + + updated = Imports.get_import!(import_job.id) + assert is_map(updated.summary["misc"]) + assert updated.summary["misc"]["calls"] >= 0 + end + end + + defp collect_requests(acc) do + receive do + {:request, path} -> collect_requests([path | acc]) + after + 0 -> Enum.reverse(acc) + end + end +end diff --git a/test/kith/workers/monica_photo_sync_worker_test.exs b/test/kith/workers/monica_photo_sync_worker_test.exs new file mode 100644 index 0000000..d95456a --- /dev/null +++ b/test/kith/workers/monica_photo_sync_worker_test.exs @@ -0,0 +1,247 @@ +defmodule Kith.Workers.MonicaPhotoSyncWorkerTest do + use Kith.DataCase, async: false + use Oban.Testing, repo: Kith.Repo + + alias Kith.Contacts + alias Kith.Imports + alias Kith.Repo + alias Kith.Workers.MonicaPhotoSyncWorker + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + import Kith.ImportsFixtures + import Kith.MonicaApiFixtures + + @stub_name :monica_photo_sync_stub + @pixel_data_url "data:image/jpeg;base64,#{Base.encode64(<<0xFF, 0xD8, 0xFF, 0xE0>>)}" + @other_pixel_data_url "data:image/png;base64,#{Base.encode64(<<0x89, 0x50, 0x4E, 0x47>>)}" + + setup do + user = user_fixture() + seed_reference_data!() + + Application.put_env( + :kith, + :monica_req_options, + plug: {Req.Test, @stub_name}, + retry: false + ) + + on_exit(fn -> Application.delete_env(:kith, :monica_req_options) end) + + %{user: user, account_id: user.account_id} + end + + defp api_import_fixture(account_id, user_id) do + import_fixture(account_id, user_id, %{ + source: "monica_api", + api_url: "https://monica.test", + api_key_encrypted: "test-key", + api_options: %{"photos" => true} + }) + end + + defp job_args(import_job), + do: %{ + "import_id" => import_job.id, + "credential_url" => "https://monica.test", + "credential_api_key" => "test-key" + } + + defp register_imported_contact!(import_job, contact, monica_id) do + {:ok, _rec} = + Imports.record_imported_entity( + import_job, + "contact", + to_string(monica_id), + "contact", + contact.id + ) + end + + describe "perform/1 — happy path" do + test "imports photo with dataUrl, sets avatar, writes sync_summary", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture(account_id, user.id) + contact = contact_fixture(account_id, %{first_name: "PhotoPerson"}) + register_imported_contact!(import_job, contact, 964) + + photo = + photo_json( + id: 35, + data_url: @pixel_data_url, + contact: contact_short_json(964, Ecto.UUID.generate(), "PhotoPerson", "Test") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert [photo_row] = Contacts.list_photos(contact.id) + assert photo_row.contact_id == contact.id + assert photo_row.content_hash != nil + + reloaded_contact = Repo.get!(Contacts.Contact, contact.id) + assert reloaded_contact.avatar == photo_row.storage_key + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["total"] == 1 + assert updated.sync_summary["synced"] == 1 + assert updated.sync_summary["failed"] == 0 + assert updated.sync_summary["not_found"] == 0 + assert [%{"status" => "synced", "contact_id" => cid}] = updated.sync_summary["photos"] + assert cid == contact.id + end + end + + describe "perform/1 — not_found" do + test "marks photo as not_found when contact has no import_record", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture(account_id, user.id) + + photo = + photo_json( + id: 100, + data_url: @pixel_data_url, + contact: contact_short_json(9999, Ecto.UUID.generate(), "Unknown", "Person") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert Repo.aggregate(Contacts.Photo, :count, :id) == 0 + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["not_found"] == 1 + assert updated.sync_summary["synced"] == 0 + assert [%{"status" => "not_found", "reason" => reason}] = updated.sync_summary["photos"] + assert reason =~ "import_records" + end + end + + describe "perform/1 — failed" do + test "marks photo as failed when dataUrl is missing", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + contact = contact_fixture(account_id, %{first_name: "NoData"}) + register_imported_contact!(import_job, contact, 200) + + photo = + photo_json( + id: 200, + data_url: nil, + link: nil, + contact: contact_short_json(200, Ecto.UUID.generate(), "NoData", "Person") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert Contacts.list_photos(contact.id) == [] + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["failed"] == 1 + assert [%{"status" => "failed", "reason" => "no_data_url"}] = updated.sync_summary["photos"] + end + end + + describe "perform/1 — dedup" do + test "dedups by content_hash on second run, still counts as synced", %{ + user: user, + account_id: account_id + } do + import_job = api_import_fixture(account_id, user.id) + contact = contact_fixture(account_id, %{first_name: "Dup"}) + register_imported_contact!(import_job, contact, 300) + + photo = + photo_json( + id: 300, + data_url: @pixel_data_url, + contact: contact_short_json(300, Ecto.UUID.generate(), "Dup", "Person") + ) + + Req.Test.stub(@stub_name, fn conn -> + Req.Test.json(conn, photos_page_json([photo])) + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert [_only_one] = Contacts.list_photos(contact.id) + + updated = Imports.get_import!(import_job.id) + assert updated.sync_summary["synced"] == 1 + assert updated.sync_summary["total"] == 1 + [entry] = updated.sync_summary["photos"] + assert entry["status"] == "synced" + assert entry["reason"] == "duplicate" + end + end + + describe "perform/1 — incremental progress" do + test "writes sync_summary after each page", %{user: user, account_id: account_id} do + import_job = api_import_fixture(account_id, user.id) + contact_a = contact_fixture(account_id, %{first_name: "PageA"}) + contact_b = contact_fixture(account_id, %{first_name: "PageB"}) + register_imported_contact!(import_job, contact_a, 401) + register_imported_contact!(import_job, contact_b, 402) + + page1_photo = + photo_json( + id: 401, + data_url: @pixel_data_url, + contact: contact_short_json(401, Ecto.UUID.generate(), "PageA", "Test") + ) + + page2_photo = + photo_json( + id: 402, + data_url: @other_pixel_data_url, + contact: contact_short_json(402, Ecto.UUID.generate(), "PageB", "Test") + ) + + test_pid = self() + + Req.Test.stub(@stub_name, fn conn -> + page = conn.query_params["page"] || "1" + + case page do + "1" -> + # Mid-flight snapshot: by the time we serve page 2, page 1 must have been + # persisted to sync_summary. + send(test_pid, :page_1_requested) + Req.Test.json(conn, photos_page_json([page1_photo], 1, 2, 2)) + + "2" -> + updated = Imports.get_import!(import_job.id) + send(test_pid, {:mid_flight_summary, updated.sync_summary}) + Req.Test.json(conn, photos_page_json([page2_photo], 2, 2, 2)) + end + end) + + assert :ok = perform_job(MonicaPhotoSyncWorker, job_args(import_job)) + + assert_received :page_1_requested + assert_received {:mid_flight_summary, mid} + # After page 1 completes, exactly one photo should be recorded. + assert mid["total"] == 1 + assert mid["synced"] == 1 + + final = Imports.get_import!(import_job.id) + assert final.sync_summary["total"] == 2 + assert final.sync_summary["synced"] == 2 + end + end +end diff --git a/test/kith/workers/phone_renormalize_worker_test.exs b/test/kith/workers/phone_renormalize_worker_test.exs new file mode 100644 index 0000000..6441b54 --- /dev/null +++ b/test/kith/workers/phone_renormalize_worker_test.exs @@ -0,0 +1,132 @@ +defmodule Kith.Workers.PhoneRenormalizeWorkerTest do + use Kith.DataCase, async: true + use Oban.Testing, repo: Kith.Repo + + import Kith.AccountsFixtures + import Kith.ContactsFixtures + + alias Kith.Contacts.ContactField + alias Kith.Repo + alias Kith.Workers.PhoneRenormalizeWorker + + setup do + seed_reference_data!() + + # Default account locale is "en" — see Account schema, which maps to "US" + # via PhoneFormatter.region_for_locale/1. + user = user_fixture() + account_id = user.account_id + + phone_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "tel:", + select: %{id: t.id}, + limit: 1 + ) + + email_type = + Repo.one!( + from t in "contact_field_types", + where: t.protocol == "mailto:", + select: %{id: t.id}, + limit: 1 + ) + + %{ + account_id: account_id, + phone_type_id: phone_type.id, + email_type_id: email_type.id + } + end + + defp insert_phone_raw(account_id, contact_id, phone_type_id, value) do + # Bypass changeset normalization so we can stash heuristic-era values that + # the new PhoneFormatter.normalize/2 would reject going forward. + now = DateTime.utc_now() |> DateTime.truncate(:second) + + {1, [%{id: id}]} = + Repo.insert_all( + "contact_fields", + [ + %{ + account_id: account_id, + contact_id: contact_id, + contact_field_type_id: phone_type_id, + value: value, + inserted_at: now, + updated_at: now + } + ], + returning: [:id] + ) + + id + end + + describe "perform/1" do + test "rewrites bare US phones to E.164 using account locale", + %{account_id: account_id, phone_type_id: phone_type_id} do + contact = contact_fixture(account_id) + id = insert_phone_raw(account_id, contact.id, phone_type_id, "2025550100") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + + assert Repo.get!(ContactField, id).value == "+12025550100" + end + + test "leaves valid E.164 values untouched (idempotence)", + %{account_id: account_id, phone_type_id: phone_type_id} do + contact = contact_fixture(account_id) + id = insert_phone_raw(account_id, contact.id, phone_type_id, "+12025550100") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + assert Repo.get!(ContactField, id).value == "+12025550100" + + # Re-run — should be a no-op. + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + assert Repo.get!(ContactField, id).value == "+12025550100" + end + + test "leaves unparseable values alone instead of clobbering", + %{account_id: account_id, phone_type_id: phone_type_id} do + contact = contact_fixture(account_id) + id = insert_phone_raw(account_id, contact.id, phone_type_id, "+") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + + # "+" is unparseable; PhoneFormatter.normalize returns it trimmed-as-is. + assert Repo.get!(ContactField, id).value == "+" + end + + test "does not touch email values", + %{account_id: account_id, email_type_id: email_type_id} do + contact = contact_fixture(account_id) + field = contact_field_fixture(contact, email_type_id, %{"value" => "user@example.com"}) + + assert :ok = perform_job(PhoneRenormalizeWorker, %{account_id: account_id}) + assert Repo.get!(ContactField, field.id).value == "user@example.com" + end + end + + describe "perform/1 all-accounts mode" do + test "iterates every account when no account_id arg supplied", + %{phone_type_id: phone_type_id} do + # Each user_fixture creates its own account. Insert one bare number per + # account; both should get rewritten to E.164. + user1 = user_fixture() + user2 = user_fixture() + + c1 = contact_fixture(user1.account_id) + c2 = contact_fixture(user2.account_id) + + id1 = insert_phone_raw(user1.account_id, c1.id, phone_type_id, "2025550100") + id2 = insert_phone_raw(user2.account_id, c2.id, phone_type_id, "2025550101") + + assert :ok = perform_job(PhoneRenormalizeWorker, %{}) + + assert Repo.get!(ContactField, id1).value == "+12025550100" + assert Repo.get!(ContactField, id2).value == "+12025550101" + end + end +end diff --git a/test/kith_web/user_auth_test.exs b/test/kith_web/user_auth_test.exs index 22ca2de..819cfd6 100644 --- a/test/kith_web/user_auth_test.exs +++ b/test/kith_web/user_auth_test.exs @@ -282,6 +282,55 @@ defmodule KithWeb.UserAuthTest do end end + describe "on_mount :require_admin" do + test "allows users with role admin", %{conn: conn, user: user} do + user_token = Accounts.generate_user_session_token(user) + session = conn |> put_session(:user_token, user_token) |> get_session() + + socket = %LiveView.Socket{ + endpoint: KithWeb.Endpoint, + assigns: %{__changed__: %{}, flash: %{}} + } + + assert {:cont, updated_socket} = + UserAuth.on_mount(:require_admin, %{}, session, socket) + + assert updated_socket.assigns.current_scope.user.id == user.id + end + + test "halts users without admin role" do + editor = user_fixture(%{role: "editor"}) + editor = %{editor | authenticated_at: DateTime.utc_now(:second)} + user_token = Accounts.generate_user_session_token(editor) + + conn = + Phoenix.ConnTest.build_conn() + |> Map.replace!(:secret_key_base, KithWeb.Endpoint.config(:secret_key_base)) + |> Plug.Test.init_test_session(%{}) + |> put_session(:user_token, user_token) + + session = get_session(conn) + + socket = %LiveView.Socket{ + endpoint: KithWeb.Endpoint, + assigns: %{__changed__: %{}, flash: %{}} + } + + assert {:halt, _socket} = UserAuth.on_mount(:require_admin, %{}, session, socket) + end + + test "halts when no user is logged in", %{conn: conn} do + session = conn |> get_session() + + socket = %LiveView.Socket{ + endpoint: KithWeb.Endpoint, + assigns: %{__changed__: %{}, flash: %{}} + } + + assert {:halt, _socket} = UserAuth.on_mount(:require_admin, %{}, session, socket) + end + end + describe "on_mount :require_sudo_mode" do test "allows users that have authenticated in the last 10 minutes", %{conn: conn, user: user} do user_token = Accounts.generate_user_session_token(user) diff --git a/test/support/fixtures/monica_api_fixtures.ex b/test/support/fixtures/monica_api_fixtures.ex index 7b230e6..49be7d5 100644 --- a/test/support/fixtures/monica_api_fixtures.ex +++ b/test/support/fixtures/monica_api_fixtures.ex @@ -49,15 +49,17 @@ defmodule Kith.MonicaApiFixtures do }, "addresses" => overrides[:addresses] || [], "tags" => overrides[:tags] || [], - "statistics" => %{ - "number_of_calls" => 0, - "number_of_notes" => overrides[:number_of_notes] || 0, - "number_of_activities" => 0, - "number_of_reminders" => 0, - "number_of_tasks" => 0, - "number_of_gifts" => 0, - "number_of_debts" => 0 - }, + "statistics" => + overrides[:statistics] || + %{ + "number_of_calls" => 0, + "number_of_notes" => overrides[:number_of_notes] || 0, + "number_of_activities" => 0, + "number_of_reminders" => 0, + "number_of_tasks" => 0, + "number_of_gifts" => 0, + "number_of_debts" => 0 + }, "contactFields" => overrides[:contact_fields] || [], "notes" => overrides[:notes] || [], "account" => %{"id" => 1},