From 22fbc687c9abf14dfbd67d5b189a8d05ffd78b09 Mon Sep 17 00:00:00 2001 From: nYeonG4001 <2371324@hansung.ac.kr> Date: Sun, 10 May 2026 13:30:48 +0900 Subject: [PATCH 1/2] =?UTF-8?q?fix(sync):=20content=5Ftags=20=EC=86=8C?= =?UTF-8?q?=EA=B8=89=20=EC=A0=81=EC=9A=A9=20=EC=B6=94=EA=B0=80=20(DP-471)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _fetch_no_category_ids → _fetch_missing_content_tag_ids 로 변경 (category IS NULL → content_tags 없는 글, YouTube 제외) - sync_one()에 save_content_tags() 호출 추가 Co-Authored-By: Claude Sonnet 4.6 --- scripts/sync_ai_metadata.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/scripts/sync_ai_metadata.py b/scripts/sync_ai_metadata.py index 510cf04..ee99de6 100644 --- a/scripts/sync_ai_metadata.py +++ b/scripts/sync_ai_metadata.py @@ -1,10 +1,10 @@ """DynamoDB ai_summaries → PostgreSQL contents tags/category 동기화. -이미 요약이 생성된 콘텐츠 중 PostgreSQL에 tags/category가 없는 항목을 -DynamoDB에서 읽어 PostgreSQL에 써준다. LLM 재호출 없음. +이미 요약이 생성된 콘텐츠 중 content_tags가 없는 항목을 DynamoDB에서 읽어 +PostgreSQL content_tags에 INSERT한다. LLM 재호출 없음. 사용 예: - # category IS NULL인 전체 콘텐츠 동기화 + # content_tags 없는 전체 콘텐츠 동기화 (YouTube 제외) DATABASE_URL=postgresql://... python scripts/sync_ai_metadata.py # 특정 content_id만 @@ -35,11 +35,23 @@ logger = logging.getLogger(__name__) -def _fetch_no_category_ids(engine) -> list[str]: - """PostgreSQL에서 category가 없는 content_id 목록을 조회한다.""" +def _fetch_missing_content_tag_ids(engine) -> list[str]: + """PostgreSQL에서 content_tags가 없는 content_id 목록을 조회한다 (YouTube 제외).""" with engine.connect() as conn: rows = conn.execute( - text("SELECT id FROM contents WHERE category IS NULL ORDER BY created_at") + text( + """ + SELECT c.id FROM contents c + JOIN content_sources cs ON cs.id = c.source_id + WHERE c.is_available = true + AND c.category IS NOT NULL + AND cs.name <> 'YouTube' + AND NOT EXISTS ( + SELECT 1 FROM content_tags ct WHERE ct.content_id = c.id + ) + ORDER BY c.created_at + """ + ) ).fetchall() return [str(row[0]) for row in rows] @@ -73,6 +85,8 @@ def sync_one( tags=tags, category=str(category), ) + if tags: + content_repo.save_content_tags(content_id=content_id, tag_names=tags) logger.info( "[%s] 동기화 완료 — category=%s tags=%s", content_id, category, tags ) @@ -98,8 +112,8 @@ def main(content_ids: list[str] | None = None) -> None: target_ids = content_ids logger.info("지정된 content_id %d개 처리", len(target_ids)) else: - target_ids = _fetch_no_category_ids(engine) - logger.info("category 없는 콘텐츠 %d개 발견", len(target_ids)) + target_ids = _fetch_missing_content_tag_ids(engine) + logger.info("content_tags 없는 콘텐츠 %d개 발견", len(target_ids)) engine.dispose() @@ -127,7 +141,7 @@ def main(content_ids: list[str] | None = None) -> None: "--content-id", nargs="+", metavar="CONTENT_ID", - help="특정 content_id만 처리 (생략 시 category IS NULL 전체 처리)", + help="특정 content_id만 처리 (생략 시 content_tags 없는 전체 처리)", ) args = parser.parse_args() main(content_ids=args.content_id) From 5466910b0f990fd176829159dafe5ec8886eee0a Mon Sep 17 00:00:00 2001 From: nYeonG4001 <2371324@hansung.ac.kr> Date: Sun, 10 May 2026 13:36:40 +0900 Subject: [PATCH 2/2] =?UTF-8?q?style:=20black=2026.x=20=ED=8F=AC=EB=A7=B7?= =?UTF-8?q?=20=EC=A0=81=EC=9A=A9=20(DP-471)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/sync_ai_metadata.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/sync_ai_metadata.py b/scripts/sync_ai_metadata.py index ee99de6..5537725 100644 --- a/scripts/sync_ai_metadata.py +++ b/scripts/sync_ai_metadata.py @@ -38,9 +38,7 @@ def _fetch_missing_content_tag_ids(engine) -> list[str]: """PostgreSQL에서 content_tags가 없는 content_id 목록을 조회한다 (YouTube 제외).""" with engine.connect() as conn: - rows = conn.execute( - text( - """ + rows = conn.execute(text(""" SELECT c.id FROM contents c JOIN content_sources cs ON cs.id = c.source_id WHERE c.is_available = true @@ -50,9 +48,7 @@ def _fetch_missing_content_tag_ids(engine) -> list[str]: SELECT 1 FROM content_tags ct WHERE ct.content_id = c.id ) ORDER BY c.created_at - """ - ) - ).fetchall() + """)).fetchall() return [str(row[0]) for row in rows]