From 783105e8331debf9f55ad1ade3fb605c421c6be0 Mon Sep 17 00:00:00 2001 From: se348 Date: Thu, 4 Sep 2025 22:44:10 +0300 Subject: [PATCH 1/3] fix: cleanup unsubmitted forms with batch deletion and correct time handling --- .../cleanup_unsubmitted_forms.ts | 116 ++++++++---------- 1 file changed, 50 insertions(+), 66 deletions(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index e7bd3e2..135120b 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -1,84 +1,68 @@ -/* Context: - This is a scheduled job that runs every day at midnight to clean up forms that users started filling in but didn't submit which are older than 7 days. - When a user visits a public form, a token is generated and stored in the database. - This token is used to identify the user and link the answers to the entity. - An entity is the owner of data in the database, separated as it could be a business or an individual but has been decoupled from a login/user. - If the user does not submit the form, the token and the entity should be deleted after 7 days. - This is to prevent the database from being cluttered with unused tokens and entities. - */ +import type { JobScheduleQueue } from "@prisma/client"; +import { prisma } from "../endpoints/middleware/prisma"; +import { update_job_status as updateJobStatus } from "./generic_scheduler"; + +type RelationshipStatus = "new" | "submitted"; +type JobStatus = "completed" | "failed"; -/* Task Instructions: - * 1. Read and understand the code below - * 2. Identify ALL issues in the code (there are multiple) - * 3. Fix the issues and create a working solution - * 4. Create a PR with clear commit messages - * 5. Record a 3-5 minute Loom video explaining: - * - What issues you found - * - How you fixed them - * - Any trade-offs you considered +const BATCH_SIZE = 20 +const EXPIRATION_DAYS = 7 + +/** + * Cleans up expired unsubmitted public form tokens and all associated data. + * + * This scheduled job runs in batches and performs deletions: + * 1. Deletes all corpus items linked to the entities + * 2. Deletes all relationships linked to the batch of tokens + * 3. Deletes all expired tokens in the batch + * 4. Deletes all associated entities + * + * The function uses absolute date calculation (midnight 7 days ago) to avoid missing tokens + * if the job is delayed or re-queued. Batches reduce database load by minimizing repeated + * opening and closing of connections, while still ensuring correctness. * - * Focus on: correctness, performance, error handling, and code clarity - * Expected time: 45-60 minutes + * @param {JobScheduleQueue} job - The scheduled job object containing job metadata. + * @throws Will throw an error if any database operation fails, updating the job status to 'failed'. */ +export const cleanupUnsubmittedForms = async (job: JobScheduleQueue) => { + try { + // Calculate absolute EXPIRATION_DAYS day-old date (midnight EXPIRATION_DAYS days ago) + const today = new Date(); + today.setHours(0, 0, 0, 0); + const sevenDaysAgo = new Date(today.getTime() - EXPIRATION_DAYS * 24 * 60 * 60 * 1000); -// For the purpose of this test you can ignore that the imports are not working. -import type { JobScheduleQueue } from "@prisma/client"; -import { prisma } from "../endpoints/middleware/prisma"; -import { update_job_status } from "./generic_scheduler"; + while (true) { + const expiredTokens = await prisma.publicFormsTokens.findMany({ + where: { createdAt: { lt: sevenDaysAgo } }, + take: BATCH_SIZE, + }); -export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { - try { - //Find forms that were created 7 days ago and have not been submitted - const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60); - const sevenDaysAgoPlusOneDay = new Date( - sevenDaysAgo.getTime() + 24 * 60 * 60 * 1000 - ); + if (expiredTokens.length === 0) break; // exit when no more expired tokens - const expiredTokens = await prisma.publicFormsTokens.findMany({ - where: { - createdAt: { - gte: sevenDaysAgo, // greater than or equal to 7 days ago - lt: sevenDaysAgoPlusOneDay, // but less than 7 days ago + 1 day - }, - }, - }); + const entityIds = expiredTokens + .map((t) => t.entityId) + .filter(Boolean) as string[]; + const tokenValues = expiredTokens.map((t) => t.token); - for (const token of expiredTokens) { - const relationship = await prisma.relationship.findFirst({ + const relationships = await prisma.relationship.findMany({ where: { - product_id: token.productId, - status: "new", + product_id: { in: expiredTokens.map((t) => t.productId) }, + status: "new" as RelationshipStatus, }, }); - if (relationship) { - await prisma.$transaction([ - // Delete relationship - prisma.relationship.delete({ - where: { id: relationship.id }, - }), - // // Delete the token - prisma.publicFormsTokens.delete({ - where: { token: token.token }, - }), - // Delete all corpus items associated with the entity - prisma.new_corpus.deleteMany({ - where: { - entity_id: token.entityId || "", - }, - }), - // Delete the entity (company) - prisma.entity.delete({ - where: { id: token.entityId || "" }, - }), - ]); - } + await prisma.$transaction([ + prisma.new_corpus.deleteMany({ where: { entity_id: { in: entityIds } } }), + prisma.relationship.deleteMany({ where: { id: { in: relationships.map((r) => r.id) } } }), + prisma.publicFormsTokens.deleteMany({ where: { token: { in: tokenValues } } }), + prisma.entity.deleteMany({ where: { id: { in: entityIds } } }), + ]); } - await update_job_status(job.id, "completed"); + await updateJobStatus(job.id, "completed" as JobStatus); } catch (error) { console.error("Error cleaning up unsubmitted forms:", error); - await update_job_status(job.id, "failed"); + await updateJobStatus(job.id, "failed" as JobStatus); throw error; } }; From 8f408974a184a2071818a2b465c4d4790fea9cf9 Mon Sep 17 00:00:00 2001 From: se348 Date: Fri, 5 Sep 2025 08:02:16 +0300 Subject: [PATCH 2/3] fix: fetch from db fields that are used only. --- tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index 135120b..0eb026f 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -34,6 +34,11 @@ export const cleanupUnsubmittedForms = async (job: JobScheduleQueue) => { while (true) { const expiredTokens = await prisma.publicFormsTokens.findMany({ where: { createdAt: { lt: sevenDaysAgo } }, + select: { + entityId: true, + token: true, + productId: true, + }, take: BATCH_SIZE, }); @@ -49,6 +54,9 @@ export const cleanupUnsubmittedForms = async (job: JobScheduleQueue) => { product_id: { in: expiredTokens.map((t) => t.productId) }, status: "new" as RelationshipStatus, }, + select: { + id: true, + }, }); await prisma.$transaction([ From aef7ad2604d98151a5ad32cbbd7d72176088c390 Mon Sep 17 00:00:00 2001 From: se348 Date: Fri, 5 Sep 2025 10:29:22 +0300 Subject: [PATCH 3/3] chore: add detailed comments. --- .../cleanup_unsubmitted_forms.ts | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts index 0eb026f..c2b60ae 100644 --- a/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts +++ b/tests/unsubmitted_forms/cleanup_unsubmitted_forms.ts @@ -1,11 +1,19 @@ import type { JobScheduleQueue } from "@prisma/client"; import { prisma } from "../endpoints/middleware/prisma"; -import { update_job_status as updateJobStatus } from "./generic_scheduler"; +import { update_job_status } from "./generic_scheduler"; type RelationshipStatus = "new" | "submitted"; type JobStatus = "completed" | "failed"; -const BATCH_SIZE = 20 +// Number of records to process in one batch. +// This value can be fine-tuned depending on: +// - database size +// - indexes available +// - workload characteristics +// Larger batches = fewer transactions but more memory/lock contention. +// Smaller batches = safer but potentially slower overall. +// Reasoning behind it is in my pr description. +const BATCH_SIZE = 20 const EXPIRATION_DAYS = 7 /** @@ -24,7 +32,7 @@ const EXPIRATION_DAYS = 7 * @param {JobScheduleQueue} job - The scheduled job object containing job metadata. * @throws Will throw an error if any database operation fails, updating the job status to 'failed'. */ -export const cleanupUnsubmittedForms = async (job: JobScheduleQueue) => { +export const cleanup_unsubmitted_forms = async (job: JobScheduleQueue) => { try { // Calculate absolute EXPIRATION_DAYS day-old date (midnight EXPIRATION_DAYS days ago) const today = new Date(); @@ -67,10 +75,12 @@ export const cleanupUnsubmittedForms = async (job: JobScheduleQueue) => { ]); } - await updateJobStatus(job.id, "completed" as JobStatus); + await update_job_status(job.id, "completed" as JobStatus); } catch (error) { + // TODO: This is not a good way to loggging. + // We should use loggers like winston. Check my pr description for more. console.error("Error cleaning up unsubmitted forms:", error); - await updateJobStatus(job.id, "failed" as JobStatus); + await update_job_status(job.id, "failed" as JobStatus); throw error; } };