From c07d980486f4cefe56a02c160bc4eb2824f6cc87 Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Sat, 24 Jan 2026 14:54:52 +0300 Subject: [PATCH 1/8] Fix date calculation for unsubmitted forms cleanup to correctly account for milliseconds --- tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index e7bd3e2..ccf9e40 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -29,7 +29,7 @@ import { update_job_status } from "./generic_scheduler"; export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { try { //Find forms that were created 7 days ago and have not been submitted - const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60); + const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); const sevenDaysAgoPlusOneDay = new Date( sevenDaysAgo.getTime() + 24 * 60 * 60 * 1000 ); From 30562f4404979c232941b03a2e4329e7caa1f684 Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Sat, 24 Jan 2026 14:57:36 +0300 Subject: [PATCH 2/8] Fix query to find all tokens older than 7 days, not just exactly 7 days old --- tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index ccf9e40..18afe4a 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -28,17 +28,14 @@ import { update_job_status } from "./generic_scheduler"; export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { try { - //Find forms that were created 7 days ago and have not been submitted - const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); - const sevenDaysAgoPlusOneDay = new Date( - sevenDaysAgo.getTime() + 24 * 60 * 60 * 1000 - ); + // Find forms that were created more than 7 days ago and have not been submitted + const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); // 7 days in ms + // get all tokens older than 7 days, not just ones from exactly 7 days ago const expiredTokens = await prisma.publicFormsTokens.findMany({ where: { createdAt: { - gte: sevenDaysAgo, // greater than or equal to 7 days ago - lt: sevenDaysAgoPlusOneDay, // but less than 7 days ago + 1 day + lt: sevenDaysAgo, }, }, }); From 6c85dfbf4ec2228c3d7620b899913bb7a2e09065 Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Sat, 24 Jan 2026 14:58:58 +0300 Subject: [PATCH 3/8] Refactor cleanup_unsubmitted_forms to streamline deletion operations with transaction array for foreign key constraints --- .../cleanup_unsubmitted_forms.ts | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index 18afe4a..d436f26 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -48,28 +48,34 @@ export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { }, }); + // build transaction array - delete in order that respects FK constraints + const deleteOperations = [ + // delete token first + prisma.publicFormsTokens.delete({ + where: { token: token.token }, + }), + // delete corpus items before entity (they reference entity_id) + prisma.new_corpus.deleteMany({ + where: { + entity_id: token.entityId || "", + }, + }), + // delete entity last since other things reference it + prisma.entity.delete({ + where: { id: token.entityId || "" }, + }), + ]; + + // only delete relationship if one exists if (relationship) { - await prisma.$transaction([ - // Delete relationship + deleteOperations.unshift( prisma.relationship.delete({ where: { id: relationship.id }, }), - // // Delete the token - prisma.publicFormsTokens.delete({ - where: { token: token.token }, - }), - // Delete all corpus items associated with the entity - prisma.new_corpus.deleteMany({ - where: { - entity_id: token.entityId || "", - }, - }), - // Delete the entity (company) - prisma.entity.delete({ - where: { id: token.entityId || "" }, - }), - ]); + ); } + + await prisma.$transaction(deleteOperations); } await update_job_status(job.id, "completed"); From fb24ea84ea088baf2acfa2906ea90cc38e79c65b Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Sat, 24 Jan 2026 15:00:01 +0300 Subject: [PATCH 4/8] Refactor cleanup_unsubmitted_forms to conditionally delete entity-related data based on existence of entityId, improving transaction handling. --- .../cleanup_unsubmitted_forms.ts | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index d436f26..6baf55f 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -49,23 +49,27 @@ export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { }); // build transaction array - delete in order that respects FK constraints - const deleteOperations = [ - // delete token first + const deleteOperations: any[] = [ + // always delete the token prisma.publicFormsTokens.delete({ where: { token: token.token }, }), - // delete corpus items before entity (they reference entity_id) - prisma.new_corpus.deleteMany({ - where: { - entity_id: token.entityId || "", - }, - }), - // delete entity last since other things reference it - prisma.entity.delete({ - where: { id: token.entityId || "" }, - }), ]; + // only delete entity-related data if entityId exists + if (token.entityId) { + deleteOperations.push( + // delete corpus items before entity (they reference entity_id) + prisma.new_corpus.deleteMany({ + where: { entity_id: token.entityId }, + }), + // delete entity last since other things reference it + prisma.entity.delete({ + where: { id: token.entityId }, + }), + ); + } + // only delete relationship if one exists if (relationship) { deleteOperations.unshift( From 97adc0d446aa234f5f2fcf8acf291704103d945f Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Sat, 24 Jan 2026 15:52:51 +0300 Subject: [PATCH 5/8] Refactor to batch deletions for better performance --- .../cleanup_unsubmitted_forms.ts | 81 +++++++++---------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index 6baf55f..36d2788 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -28,59 +28,54 @@ import { update_job_status } from "./generic_scheduler"; export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { try { - // Find forms that were created more than 7 days ago and have not been submitted const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); // 7 days in ms - // get all tokens older than 7 days, not just ones from exactly 7 days ago + // fetch only the fields we need to build our delete queries const expiredTokens = await prisma.publicFormsTokens.findMany({ where: { - createdAt: { - lt: sevenDaysAgo, - }, + createdAt: { lt: sevenDaysAgo }, }, + select: { token: true, entityId: true, productId: true }, }); - for (const token of expiredTokens) { - const relationship = await prisma.relationship.findFirst({ + // nothing to clean up + if (expiredTokens.length === 0) { + await update_job_status(job.id, "completed"); + return; + } + + // collect ids for batch deletion - filter out nulls + const tokenStrings = expiredTokens.map((t) => t.token); + const entityIds = expiredTokens + .map((t) => t.entityId) + .filter((id): id is string => id !== null && id !== undefined); + const productIds = expiredTokens + .map((t) => t.productId) + .filter((id): id is string => id !== null && id !== undefined); + + // batch delete in a single transaction - order matters for FK constraints + // NOTE: for very large datasets (50k+), consider chunking to avoid DB limits on IN clauses + await prisma.$transaction([ + // delete relationships tied to these products that are still "new" (unsubmitted) + prisma.relationship.deleteMany({ where: { - product_id: token.productId, + product_id: { in: productIds }, status: "new", }, - }); - - // build transaction array - delete in order that respects FK constraints - const deleteOperations: any[] = [ - // always delete the token - prisma.publicFormsTokens.delete({ - where: { token: token.token }, - }), - ]; - - // only delete entity-related data if entityId exists - if (token.entityId) { - deleteOperations.push( - // delete corpus items before entity (they reference entity_id) - prisma.new_corpus.deleteMany({ - where: { entity_id: token.entityId }, - }), - // delete entity last since other things reference it - prisma.entity.delete({ - where: { id: token.entityId }, - }), - ); - } - - // only delete relationship if one exists - if (relationship) { - deleteOperations.unshift( - prisma.relationship.delete({ - where: { id: relationship.id }, - }), - ); - } - - await prisma.$transaction(deleteOperations); - } + }), + // delete corpus items before entities (they reference entity_id) + prisma.new_corpus.deleteMany({ + where: { entity_id: { in: entityIds } }, + }), + // delete the entities + prisma.entity.deleteMany({ + where: { id: { in: entityIds } }, + }), + // finally delete the tokens + prisma.publicFormsTokens.deleteMany({ + where: { token: { in: tokenStrings } }, + }), + ]); await update_job_status(job.id, "completed"); } catch (error) { From 93e84483ef5c3684cfdd35be598e0b8e9aea58da Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Sat, 24 Jan 2026 15:55:28 +0300 Subject: [PATCH 6/8] Remove outdated note regarding chunking for large datasets in cleanup_unsubmitted_forms, streamlining the code for clarity. --- tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index 36d2788..2c016de 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -54,7 +54,6 @@ export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { .filter((id): id is string => id !== null && id !== undefined); // batch delete in a single transaction - order matters for FK constraints - // NOTE: for very large datasets (50k+), consider chunking to avoid DB limits on IN clauses await prisma.$transaction([ // delete relationships tied to these products that are still "new" (unsubmitted) prisma.relationship.deleteMany({ From bcfc690843a9e0fc06a24298f455b5c2b5d79756 Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Sat, 24 Jan 2026 18:05:05 +0300 Subject: [PATCH 7/8] Enhance cleanup_unsubmitted_forms by refining query logic to fetch unsubmitted tokens older than 7 days, and optimize ID collection for batch deletion by ensuring uniqueness, improving performance and clarity. --- tests/unsubmitted_forms/README.md | 45 +++++++++++++++++++ .../cleanup_unsubmitted_forms.ts | 28 ++++++++---- 2 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 tests/unsubmitted_forms/README.md diff --git a/tests/unsubmitted_forms/README.md b/tests/unsubmitted_forms/README.md new file mode 100644 index 0000000..97de1e5 --- /dev/null +++ b/tests/unsubmitted_forms/README.md @@ -0,0 +1,45 @@ +# Cleanup Unsubmitted Forms - Solution + +## Video Explanation + +[Watch the Loom video](https://www.loom.com/share/3d0413ae4c2845cab8a3e3c93b62f9e3) + +## Issues Found & Fixes + +### 1. Date Calculation Bug +**Problem:** Missing `* 1000` for milliseconds conversion - was only subtracting ~10 minutes instead of 7 days. + +**Fix:** Added `* 1000` to convert seconds to milliseconds. + +### 2. Query Logic Error +**Problem:** Original query only found tokens from exactly 7 days ago (a 24-hour window), missing tokens that were 8, 9, 10+ days old. + +**Fix:** Changed to `lt: sevenDaysAgo` to find all tokens older than 7 days. + +### 3. Foreign Key Constraint Order +**Problem:** Deletion order could fail if corpus items reference entity_id. + +**Fix:** Reordered deletions to delete corpus items before entities. + +### 4. Orphaned Tokens Not Handled +**Problem:** If no relationship existed, the token was never deleted. + +**Fix:** Always delete the token, conditionally delete relationship only if it exists. + +### 5. Null EntityId Handling +**Problem:** Passing empty string `""` to delete queries when entityId is null would cause errors. + +**Fix:** Only delete entity-related data if entityId actually exists. + +### 6. N+1 Query Problem (Performance) +**Problem:** Loop with individual `findFirst` and `$transaction` per token would cause thousands of DB calls at scale. + +**Fix:** Refactored to batch approach - collect all IDs upfront, use `deleteMany` with `{ in: [...] }` in a single transaction. + +## Trade-offs Considered + +- **Batch vs Loop:** Chose batch deletions for O(1) DB round-trips instead of O(N). Trade-off: for very large datasets (50k+ records), the `IN` clause could hit DB limits - would need chunking in production. + +- **Memory:** Still loading all expired tokens into memory. For millions of records, would need cursor-based pagination. + +- **Atomicity:** Single transaction means all-or-nothing. If one delete fails, everything rolls back - which is generally safer for data consistency. diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index 2c016de..94352a8 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -30,10 +30,12 @@ export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { try { const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); // 7 days in ms - // fetch only the fields we need to build our delete queries + // fetch only unsubmitted tokens older than 7 days + // submittedAt being null means the form was never submitted const expiredTokens = await prisma.publicFormsTokens.findMany({ where: { createdAt: { lt: sevenDaysAgo }, + submittedAt: null, }, select: { token: true, entityId: true, productId: true }, }); @@ -44,14 +46,22 @@ export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { return; } - // collect ids for batch deletion - filter out nulls - const tokenStrings = expiredTokens.map((t) => t.token); - const entityIds = expiredTokens - .map((t) => t.entityId) - .filter((id): id is string => id !== null && id !== undefined); - const productIds = expiredTokens - .map((t) => t.productId) - .filter((id): id is string => id !== null && id !== undefined); + // collect unique ids for batch deletion - dedupe to reduce query size + const tokenStrings = [...new Set(expiredTokens.map((t) => t.token))]; + const entityIds = [ + ...new Set( + expiredTokens + .map((t) => t.entityId) + .filter((id): id is string => id !== null && id !== undefined), + ), + ]; + const productIds = [ + ...new Set( + expiredTokens + .map((t) => t.productId) + .filter((id): id is string => id !== null && id !== undefined), + ), + ]; // batch delete in a single transaction - order matters for FK constraints await prisma.$transaction([ From f968029cb30bc922e667c36dbdb87088e4cf6c2d Mon Sep 17 00:00:00 2001 From: Ochiengsteven Date: Mon, 26 Jan 2026 11:29:26 +0300 Subject: [PATCH 8/8] Refactor cleanup_unsubmitted_forms to implement batch processing for improved memory management and performance, ensuring safe deletion of relationships tied to expired entities while maintaining atomic transactions for each batch. --- tests/unsubmitted_forms/README.md | 55 ++++-- .../cleanup_unsubmitted_forms.ts | 161 +++++++++++------- 2 files changed, 135 insertions(+), 81 deletions(-) diff --git a/tests/unsubmitted_forms/README.md b/tests/unsubmitted_forms/README.md index 97de1e5..159345c 100644 --- a/tests/unsubmitted_forms/README.md +++ b/tests/unsubmitted_forms/README.md @@ -14,32 +14,57 @@ ### 2. Query Logic Error **Problem:** Original query only found tokens from exactly 7 days ago (a 24-hour window), missing tokens that were 8, 9, 10+ days old. -**Fix:** Changed to `lt: sevenDaysAgo` to find all tokens older than 7 days. +**Fix:** Changed to `lt: cutoffDate` to find all tokens older than 7 days. -### 3. Foreign Key Constraint Order -**Problem:** Deletion order could fail if corpus items reference entity_id. +### 3. Missing Submission Status Check +**Problem:** Query fetched all old tokens regardless of whether the form was submitted, risking deletion of valid submitted data. -**Fix:** Reordered deletions to delete corpus items before entities. +**Fix:** Added `submittedAt: null` filter to only fetch unsubmitted tokens. -### 4. Orphaned Tokens Not Handled -**Problem:** If no relationship existed, the token was never deleted. +### 4. Wrong Relationship Deletion +**Problem:** Relationship query only filtered by `product_id` and `status`, which could accidentally delete relationships belonging to other valid tokens that share the same product. -**Fix:** Always delete the token, conditionally delete relationship only if it exists. +**Fix:** Added `entity_id: { in: entityIds }` to the relationship query to ensure we only delete relationships tied to the specific expired entities. -### 5. Null EntityId Handling -**Problem:** Passing empty string `""` to delete queries when entityId is null would cause errors. +### 5. Foreign Key Constraint Order +**Problem:** Deletion order could fail if child records reference parent tables. -**Fix:** Only delete entity-related data if entityId actually exists. +**Fix:** Reordered deletions: relationships → corpus items → tokens → entities (children before parents). -### 6. N+1 Query Problem (Performance) +### 6. Null EntityId Handling +**Problem:** Passing null/undefined values to delete queries would cause errors. + +**Fix:** Filter out null values using `filter((id): id is string => Boolean(id))` before batch operations. + +### 7. N+1 Query Problem (Performance) **Problem:** Loop with individual `findFirst` and `$transaction` per token would cause thousands of DB calls at scale. -**Fix:** Refactored to batch approach - collect all IDs upfront, use `deleteMany` with `{ in: [...] }` in a single transaction. +**Fix:** Refactored to batch approach using `deleteMany` with `{ in: [...] }` in a single transaction. + +### 8. Memory Issues with Large Datasets + +**Problem:** Loading all expired tokens at once could crash the process with Out of Memory errors. + +**Fix:** Implemented batching with `take: BATCH_SIZE` (1000 records) in a while loop to process in chunks. + +### 9. Duplicate IDs in Queries + +**Problem:** Multiple tokens could share the same entityId/productId, causing unnecessary parameter bloat in IN clauses. + +**Fix:** Deduplicate IDs using `[...new Set(...)]` before batch operations. + +## Final Solution Features + +- **Configurable constants:** `BATCH_SIZE` and `RETENTION_DAYS` at the top for easy adjustment +- **Batched processing:** Handles millions of records without memory issues +- **Safe relationship deletion:** Only deletes relationships tied to expired entities +- **Atomic transactions:** Each batch is processed in a single transaction +- **Observability:** Logs total counts of deleted records for monitoring ## Trade-offs Considered -- **Batch vs Loop:** Chose batch deletions for O(1) DB round-trips instead of O(N). Trade-off: for very large datasets (50k+ records), the `IN` clause could hit DB limits - would need chunking in production. +- **Batching:** Chose 1000 records per batch as a balance between performance and memory. Could be tuned based on server resources. -- **Memory:** Still loading all expired tokens into memory. For millions of records, would need cursor-based pagination. +- **Extra query for relationships:** Added a `findMany` to get relationship IDs before deletion. This adds one query per batch but ensures we never delete the wrong relationships. -- **Atomicity:** Single transaction means all-or-nothing. If one delete fails, everything rolls back - which is generally safer for data consistency. +- **Atomicity per batch:** Each batch is atomic, not the entire job. If the job fails mid-way, some batches will have been committed. This is acceptable for a cleanup job - it can safely be re-run. diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index 94352a8..4125723 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -7,84 +7,113 @@ This is to prevent the database from being cluttered with unused tokens and entities. */ -/* Task Instructions: - * 1. Read and understand the code below - * 2. Identify ALL issues in the code (there are multiple) - * 3. Fix the issues and create a working solution - * 4. Create a PR with clear commit messages - * 5. Record a 3-5 minute Loom video explaining: - * - What issues you found - * - How you fixed them - * - Any trade-offs you considered - * - * Focus on: correctness, performance, error handling, and code clarity - * Expected time: 45-60 minutes - */ - // For the purpose of this test you can ignore that the imports are not working. import type { JobScheduleQueue } from "@prisma/client"; import { prisma } from "../endpoints/middleware/prisma"; import { update_job_status } from "./generic_scheduler"; +const BATCH_SIZE = 1000; +const RETENTION_DAYS = 7; + export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { try { - const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); // 7 days in ms - - // fetch only unsubmitted tokens older than 7 days - // submittedAt being null means the form was never submitted - const expiredTokens = await prisma.publicFormsTokens.findMany({ - where: { - createdAt: { lt: sevenDaysAgo }, - submittedAt: null, - }, - select: { token: true, entityId: true, productId: true }, - }); - - // nothing to clean up - if (expiredTokens.length === 0) { - await update_job_status(job.id, "completed"); - return; - } + const cutoffDate = new Date( + Date.now() - RETENTION_DAYS * 24 * 60 * 60 * 1000, + ); + + let totalTokensDeleted = 0; + let totalEntitiesDeleted = 0; + let totalRelationshipsDeleted = 0; + let totalCorpusDeleted = 0; + + // Process in batches to handle large datasets without memory issues + while (true) { + const expiredTokens = await prisma.publicFormsTokens.findMany({ + where: { + createdAt: { lt: cutoffDate }, + submittedAt: null, + }, + select: { + token: true, + entityId: true, + productId: true, + }, + take: BATCH_SIZE, + }); + + if (expiredTokens.length === 0) { + break; + } + + // Deduplicate identifiers for batch operations + const tokenStrings = [...new Set(expiredTokens.map((t) => t.token))]; - // collect unique ids for batch deletion - dedupe to reduce query size - const tokenStrings = [...new Set(expiredTokens.map((t) => t.token))]; - const entityIds = [ - ...new Set( - expiredTokens - .map((t) => t.entityId) - .filter((id): id is string => id !== null && id !== undefined), - ), - ]; - const productIds = [ - ...new Set( - expiredTokens - .map((t) => t.productId) - .filter((id): id is string => id !== null && id !== undefined), - ), - ]; - - // batch delete in a single transaction - order matters for FK constraints - await prisma.$transaction([ - // delete relationships tied to these products that are still "new" (unsubmitted) - prisma.relationship.deleteMany({ + const entityIds = [ + ...new Set( + expiredTokens + .map((t) => t.entityId) + .filter((id): id is string => Boolean(id)), + ), + ]; + + const productIds = [ + ...new Set( + expiredTokens + .map((t) => t.productId) + .filter((id): id is string => Boolean(id)), + ), + ]; + + // Find only relationships that belong to these specific expired tokens + // This prevents accidentally deleting relationships for non-expired tokens + // that may share the same product_id + const relationshipsToDelete = await prisma.relationship.findMany({ where: { product_id: { in: productIds }, status: "new", + // Ensure we only delete relationships tied to expired entities + entity_id: { in: entityIds }, }, - }), - // delete corpus items before entities (they reference entity_id) - prisma.new_corpus.deleteMany({ - where: { entity_id: { in: entityIds } }, - }), - // delete the entities - prisma.entity.deleteMany({ - where: { id: { in: entityIds } }, - }), - // finally delete the tokens - prisma.publicFormsTokens.deleteMany({ - where: { token: { in: tokenStrings } }, - }), - ]); + select: { id: true }, + }); + + const relationshipIds = relationshipsToDelete.map((r) => r.id); + + // Execute all deletions in a single atomic transaction + // Order respects foreign key constraints: children before parents + const results = await prisma.$transaction([ + prisma.relationship.deleteMany({ + where: { id: { in: relationshipIds } }, + }), + + prisma.new_corpus.deleteMany({ + where: { entity_id: { in: entityIds } }, + }), + + prisma.publicFormsTokens.deleteMany({ + where: { token: { in: tokenStrings } }, + }), + + prisma.entity.deleteMany({ + where: { id: { in: entityIds } }, + }), + ]); + + totalRelationshipsDeleted += results[0].count; + totalCorpusDeleted += results[1].count; + totalTokensDeleted += results[2].count; + totalEntitiesDeleted += results[3].count; + + // If we got fewer than BATCH_SIZE, we've processed everything + if (expiredTokens.length < BATCH_SIZE) { + break; + } + } + + console.log( + `Cleanup complete: ${totalTokensDeleted} tokens, ${totalEntitiesDeleted} entities, ` + + `${totalRelationshipsDeleted} relationships, ${totalCorpusDeleted} corpus items deleted`, + ); await update_job_status(job.id, "completed"); } catch (error) {