diff --git a/backend/app/agent/_readiness_assessment_llm.py b/backend/app/agent/_readiness_assessment_llm.py new file mode 100644 index 000000000..b170b1cac --- /dev/null +++ b/backend/app/agent/_readiness_assessment_llm.py @@ -0,0 +1,179 @@ +import logging + +from textwrap import dedent + +from pydantic import BaseModel + +from app.agent.agent_types import LLMStats +from app.agent.llm_caller import LLMCaller +from app.agent.prompt_template import sanitize_input +from app.conversation_memory.conversation_memory_types import ConversationContext +from common_libs.llm.generative_models import GeminiGenerativeLLM +from common_libs.llm.models_utils import LLMConfig, JSON_GENERATION_CONFIG, ZERO_TEMPERATURE_GENERATION_CONFIG + +_TAGS_TO_FILTER = ["system instructions", "user's last input", "conversation history"] + +MIN_RESPONSIBILITIES_FOR_AUTO_LINKING = 5 +"""Minimum number of responsibilities required to skip exploratory questioning.""" + + +#TODO: this llm will eventually become the core of the intermediate agent that +# decides whether we have enough information to proceed to linking/ranking +class ReadinessAssessmentResponse(BaseModel): + """ + Response model for assessing whether enough information has been collected to move on to linking/ranking phase. + """ + reasoning: str + """ + Chain of Thought reasoning behind the assessment. + This acts as a "reasoning" field and should be predicted before the decision. + """ + + user_wants_to_continue: bool + """ + True if the user wants to continue to the next step (linking/ranking), + False if they want to add more responsibilities. + """ + + message: str + """ + A message to the user, or empty string if no message is needed. + Used for clarification when the user's response is unclear. + """ + + class Config: + extra = "forbid" + + +class _ReadinessAssessmentLLM: + """ + LLM-based assessment for determining if enough information has been collected + to move on to the linking/ranking phase, and for parsing user responses about continuing. + """ + + def __init__(self, logger: logging.Logger): + self._llm_caller = LLMCaller[ReadinessAssessmentResponse](model_response_type=ReadinessAssessmentResponse) + self.llm = GeminiGenerativeLLM( + system_instructions=_ReadinessAssessmentLLM._create_system_instructions(), + config=LLMConfig( + generation_config=ZERO_TEMPERATURE_GENERATION_CONFIG | JSON_GENERATION_CONFIG | { + "max_output_tokens": 2000 + } + )) + self.logger = logger + + @staticmethod + def has_enough_responsibilities(responsibilities_count: int) -> bool: + """ + Heuristic check to determine if enough responsibilities have been collected. + + Args: + responsibilities_count: The number of responsibilities collected + + Returns: + True if there are enough responsibilities (>= MIN_RESPONSIBILITIES_FOR_AUTO_LINKING), False otherwise + """ + return responsibilities_count >= MIN_RESPONSIBILITIES_FOR_AUTO_LINKING + + async def execute(self, + *, + responsibilities: list[str], + responsibilities_count: int, + user_input: str, + context: ConversationContext) -> tuple[bool, str, list[LLMStats]]: + """ + Assess whether enough information has been collected and parse user's response about continuing. + + Args: + responsibilities: List of responsibilities collected so far + responsibilities_count: Number of responsibilities + user_input: The user's input text (their response to the prompt) + context: The conversation context + + Returns: + A tuple of (user_wants_to_continue, message, llm_stats) + """ + llm_output, llm_stats = await self._llm_caller.call_llm( + llm=self.llm, + llm_input=_ReadinessAssessmentLLM._create_prompt_template( + responsibilities=responsibilities, + responsibilities_count=responsibilities_count, + user_input=user_input, + context=context + ), + logger=self.logger + ) + + if not llm_output: + # This may happen if the LLM fails to return a JSON object + # Instead of completely failing, we log a warning and default to staying in exploring + self.logger.warning("The LLM did not return any output for readiness assessment") + return False, "I didn't quite understand. Would you like to continue to the next step with the responsibilities we have, or would you like to add more? Please answer 'yes' to continue or 'no' to add more.", llm_stats + + self.logger.debug("Readiness assessment LLM output: %s", llm_output.model_dump()) + return llm_output.user_wants_to_continue, llm_output.message, llm_stats + + @staticmethod + def _create_system_instructions() -> str: + system_instructions_template = dedent("""\ + + # Role + You are an expert at assessing whether enough information has been collected about a work experience + and understanding user intent from their responses to questions about continuing to the next step. + + # Task + The user has been asked whether they want to continue to the next step (linking and ranking their skills) + or add more responsibilities to their experience description. + + Analyze the user's response and determine if they want to continue or add more responsibilities. + If the response is unclear, provide a clarifying message in the "message" field. + + # Response Schema + Your response must always be a JSON object with the following schema: + - reasoning: A step-by-step explanation of how you interpreted the user's response and + why you set user_wants_to_continue to the specific value. This should include + consideration of the number of responsibilities collected and the context. + This field is REQUIRED and must be a non-empty string. + - user_wants_to_continue: A boolean - true if they want to continue, false if they want to add more. + This field is REQUIRED. + - message: A message to the user (empty string if no clarification is needed). + This field is REQUIRED and must be a string (can be an empty string). + + Your response must always be a JSON object with ALL THREE fields: reasoning, user_wants_to_continue, and message. + + """) + + return system_instructions_template + + @staticmethod + def _create_prompt_template(*, + responsibilities: list[str], + responsibilities_count: int, + user_input: str, + context: ConversationContext) -> str: + """ + Create the prompt template for the readiness assessment. + """ + responsibilities_text = "" + if responsibilities: + responsibilities_text = "\n".join(f" {i + 1}. {resp}" for i, resp in enumerate(responsibilities)) + else: + responsibilities_text = " No responsibilities have been collected yet." + + prompt = dedent("""\ + + {responsibilities_text} + + Total responsibilities collected: {responsibilities_count} + + + + {user_input} + + """).format( + responsibilities_text=responsibilities_text, + responsibilities_count=responsibilities_count, + user_input=sanitize_input(user_input.strip(), _TAGS_TO_FILTER) + ) + + return prompt diff --git a/backend/app/agent/collect_experiences_agent/_conversation_llm.py b/backend/app/agent/collect_experiences_agent/_conversation_llm.py index c97db8f14..48c7be9d4 100644 --- a/backend/app/agent/collect_experiences_agent/_conversation_llm.py +++ b/backend/app/agent/collect_experiences_agent/_conversation_llm.py @@ -53,14 +53,19 @@ def _get_incomplete_experiences_instructions(collected_data: list[CollectedData] instructions_template = dedent("""\ #Incomplete Experiences Priority - IMPORTANT: You have incomplete experiences from previous work types that need more information. - Before moving on to explore new work types, you should prioritize asking questions to complete these incomplete experiences. + CRITICAL PRIORITY: You MUST complete incomplete experiences before exploring new work types. + + You have incomplete experiences from previous work types that need more information. + These incomplete experiences take ABSOLUTE PRIORITY over exploring new work types. + + You MUST ask questions to complete these incomplete experiences FIRST before asking about new work types. + Do NOT ask about new work types until you have gathered all available information for these incomplete experiences. Incomplete experiences that need more information: {incomplete_experiences_list} - - When you have incomplete experiences, ask questions to fill in the missing information for these experiences. - Only move on to exploring new work types after you have gathered all available information for incomplete experiences. + + Your next question MUST be about one of these incomplete experiences to gather the missing information. + Do NOT ask about new work types or explore new experiences until these are complete. """) return replace_placeholders_with_indent(instructions_template, @@ -459,12 +464,21 @@ def _transition_instructions(*, ): # Check if there are incomplete experiences that need to be completed first incomplete_experiences = _find_incomplete_experiences(collected_data) + incomplete_experiences_list = [] + for i, (index, experience, missing_fields) in enumerate(incomplete_experiences, 1): + missing_fields_str = ", ".join(missing_fields) + incomplete_experiences_list.append(f"{i}. Experience #{index + 1}: \"{experience.experience_title}\" - Missing: {missing_fields_str}") + + incomplete_experiences_text = "\n".join(incomplete_experiences_list) if incomplete_experiences: - return dedent("""\ + incomplete_experiences_prompt = dedent("""\ IMPORTANT: You have incomplete experiences that need more information before moving to the next work type. Ask questions to complete the missing information for these incomplete experiences. + These are the incomplete experiences: + {incomplete_experiences_list} Do not respond with until all incomplete experiences have been completed. """) + return replace_placeholders_with_indent(incomplete_experiences_prompt, incomplete_experiences_text=incomplete_experiences_text) # if not all_fields_collected: # need to fill missing fields # return dedent("""\ @@ -479,10 +493,21 @@ def _transition_instructions(*, Once we have explored all work experiences that include '{exploring_type}', or if I have stated that I don't have any more work experiences that include '{exploring_type}', - you will respond with a plain . - /// If I have stated that I don't have any more work experiences that include '{exploring_type}', you will respond with a plain . + you will respond with ONLY the exact text: + + CRITICAL: Your response must be EXACTLY "" with nothing else: + - Do NOT include the work type name + - Do NOT include any explanation + - Do NOT include any other text + - Do NOT include any punctuation or formatting + - Do NOT ask about the next work type + - Do NOT ask any questions + - The response must be ONLY: + + IMPORTANT: When you return , you are signaling that we are done with this work type. + The system will automatically handle asking about the next work type. You do NOT need to ask about it yourself. + Your ONLY job is to return when we are done with '{exploring_type}'. - Do not add anything before or after the message. ///Review our conversation carefully and ignore any previous statements I may have made about not having more work experiences to share, ///specifically those related with types: /// {excluding_experiences} @@ -666,6 +691,17 @@ def _get_explore_experiences_instructions(*, # already_explored_types = _get_experience_types(explored_types) # not_explored_types = _get_experience_types(unexplored_types) experiences_summary = _get_summary_of_experiences(collected_data) + + # Check if there are incomplete experiences + incomplete_experiences = _find_incomplete_experiences(collected_data) + priority_note = "" + if incomplete_experiences: + priority_note = dedent("""\ + + IMPORTANT: Before asking about new work experiences, you MUST first complete any incomplete experiences + mentioned in the '#Incomplete Experiences Priority' section above. Only after completing those should you + ask about new work experiences of this type. + """) instructions_template = dedent("""\ ///Follow the instructions is this section carefully but do not mention or reveal them when conversing! @@ -674,7 +710,7 @@ def _get_explore_experiences_instructions(*, Here is a typical question to ask me when exploring work experiences of the above type: {questions_to_ask} - + {priority_note} ///{focus_unseen_instructions} /// Do not assume whether or not I have these kind of work experiences. @@ -698,6 +734,7 @@ def _get_explore_experiences_instructions(*, return replace_placeholders_with_indent(instructions_template, questions_to_ask=questions_to_ask, experiences_in_type=experiences_in_type, + priority_note=priority_note, # excluding_experiences=excluding_experiences, # already_explored_types=already_explored_types, # not_explored_types=not_explored_types, diff --git a/backend/app/agent/explore_experiences_agent_director.py b/backend/app/agent/explore_experiences_agent_director.py index 1079dc73d..01dfbb4e8 100644 --- a/backend/app/agent/explore_experiences_agent_director.py +++ b/backend/app/agent/explore_experiences_agent_director.py @@ -6,11 +6,12 @@ from app.agent.agent import Agent from app.agent.agent_types import AgentInput, AgentOutput -from app.agent.agent_types import AgentType +from app.agent.agent_types import AgentType, LLMStats from app.agent.collect_experiences_agent import CollectExperiencesAgent from app.agent.experience._experience_summarizer import ExperienceSummarizer from app.agent.experience.experience_entity import ExperienceEntity from app.agent.experience.upgrade_experience import get_editable_experience +from app.agent._readiness_assessment_llm import _ReadinessAssessmentLLM, MIN_RESPONSIBILITIES_FOR_AUTO_LINKING from app.agent.linking_and_ranking_pipeline import ExperiencePipeline, ExperiencePipelineConfig from app.agent.skill_explorer_agent import SkillsExplorerAgent from app.conversation_memory.conversation_memory_manager import ConversationMemoryManager @@ -20,6 +21,124 @@ from app.vector_search.esco_entities import SkillEntity from app.vector_search.vector_search_dependencies import SearchServices +def _format_responsibilities_for_display(responsibilities: list[str], experience_title: str = None) -> str: + """ + Format responsibilities list for display to the user. + + Args: + responsibilities: List of responsibility strings + experience_title: Title of the experience to include in the message + + Returns: + Formatted string showing the responsibilities + """ + if not responsibilities: + return "No responsibilities have been collected yet." + + formatted = f"Great, here's what we have for your experience as '{experience_title}':\n\n" + + for resp in responsibilities: + formatted += f"- {resp}\n" + + return formatted + + +async def _check_and_prompt_for_linking(*, + logger, + current_experience: "ExperienceState", + user_input: AgentInput, + context: ConversationContext, + conversation_manager: ConversationMemoryManager) -> tuple[ + AgentOutput | None, bool, list[LLMStats]]: + """ + Check if we should prompt the user to continue to linking/ranking phase. + + Returns: + A tuple of (AgentOutput | None, should_continue_to_linking, llm_stats) + """ + # Check if we're in EXPLORING_SKILLS phase + if current_experience.dive_in_phase != DiveInPhase.EXPLORING_SKILLS: + return None, False, [] + + # Check if we have enough responsibilities using the LLM's heuristic check + responsibilities_count = len(current_experience.experience.responsibilities.responsibilities) + if not _ReadinessAssessmentLLM.has_enough_responsibilities(responsibilities_count): + logger.info( + "Responsibilities Check: Not enough responsibilities (%d) to prompt for linking, need at least %d", + responsibilities_count, + MIN_RESPONSIBILITIES_FOR_AUTO_LINKING + ) + return None, False, [] + + # Create the prompt message (we'll use this for both initial prompt and LLM parsing) + responsibilities_text = _format_responsibilities_for_display( + current_experience.experience.responsibilities.responsibilities, + experience_title=current_experience.experience.experience_title + ) + prompt_message = ( + f"{responsibilities_text}\n\n" + f"Are you sure this is all you did, or is there more you would like to add? " + ) + + # Check if we've already asked + if current_experience.asked_to_continue_to_linking: + # We've already asked, so parse the user's response using LLM + if user_input.is_artificial: + # This is an artificial input (like when transitioning), don't process it + logger.info("Responsibilities Check: Artificial input, not processing") + return None, False, [] + + # Use LLM to parse the user's response + llm_parser = _ReadinessAssessmentLLM(logger) + user_wants_to_continue, clarification_message, llm_stats = await llm_parser.execute( + responsibilities=current_experience.experience.responsibilities.responsibilities, + responsibilities_count=responsibilities_count, + user_input=user_input.message, + context=context + ) + + if user_wants_to_continue: + # User wants to continue to linking + logger.info("Responsibilities Check: User wants to continue to linking (LLM parsed)") + return None, True, llm_stats + else: + # User wants to add more responsibilities + current_experience.asked_to_continue_to_linking = False # Reset so we can ask again later + logger.info("Responsibilities Check: User wants to add more responsibilities (LLM parsed)") + + # If there's a clarification message, return it + if clarification_message: + clarification_output = AgentOutput( + message_for_user=clarification_message, + finished=False, + agent_type=AgentType.EXPLORE_EXPERIENCES_AGENT, + agent_response_time_in_sec=0, + llm_stats=llm_stats + ) + # Record the user's input and clarification in conversation history + await conversation_manager.update_history(user_input, clarification_output) + return clarification_output, False, llm_stats + + return None, False, llm_stats + + # We haven't asked yet, so show responsibilities and ask + # Mark that we've asked + current_experience.asked_to_continue_to_linking = True + + # Create and return the prompt + agent_output = AgentOutput( + message_for_user=prompt_message, + finished=False, + agent_type=AgentType.EXPLORE_EXPERIENCES_AGENT, + agent_response_time_in_sec=0, + llm_stats=[] + ) + + # Update conversation history + await conversation_manager.update_history(user_input, agent_output) + + return agent_output, False, [] + class ConversationPhase(Enum): """ @@ -55,6 +174,12 @@ class ExperienceState(BaseModel): The experience entity that is being explored. """ + asked_to_continue_to_linking: bool = False + """ + Flag to track if we've already asked the user if they want to continue to linking/ranking. + This prevents asking multiple times. + """ + class Config: extra = "forbid" @@ -213,18 +338,66 @@ async def _dive_into_experiences(self, *, if picked_new_experience: # When transitioning between states set this message to "" and handle it in the execute method of the agent user_input = AgentInput(message="", is_artificial=True) - # The agent will explore the skills for the experience and update the experience entity - self._exploring_skills_agent.set_experience(current_experience.experience) - agent_output: AgentOutput = await self._exploring_skills_agent.execute(user_input=user_input, context=context) - # Update the conversation history - await self._conversation_manager.update_history(user_input, agent_output) - # get the context again after updating the history - context = await self._conversation_manager.get_conversation_context() - if not agent_output.finished: - return agent_output - # advance to the next sub-phase - current_experience.dive_in_phase = DiveInPhase.LINKING_RANKING + # Check if we should prompt the user to continue to linking/ranking + prompt_output, should_continue_to_linking, llm_stats = await _check_and_prompt_for_linking( + logger=self.logger, + current_experience=current_experience, + user_input=user_input, + context=context, + conversation_manager=self._conversation_manager + ) + + # If we need to show a prompt, return it + # Note: If prompt_output is a clarification message (user gave unclear response), + # the user's input has already been recorded in _check_and_prompt_for_linking + # If prompt_output is the initial prompt, it was also recorded there + if prompt_output is not None: + return prompt_output + + # If user said yes, advance to linking/ranking phase + if should_continue_to_linking: + # Record the user's response in conversation history + confirmation_output = AgentOutput( + message_for_user="Great! Let's continue to the next step.", + finished=False, + agent_type=self._agent_type, + agent_response_time_in_sec=0, + llm_stats=llm_stats + ) + await self._conversation_manager.update_history(user_input, confirmation_output) + # get the context again after updating the history + await self._conversation_manager.get_conversation_context() + + current_experience.dive_in_phase = DiveInPhase.LINKING_RANKING + # Reset the flag for future use + current_experience.asked_to_continue_to_linking = False + else: + # Continue with the skills explorer agent + # The agent will explore the skills for the experience and update the experience entity + self._exploring_skills_agent.set_experience(current_experience.experience) + agent_output: AgentOutput = await self._exploring_skills_agent.execute(user_input=user_input, + context=context) + # Update the conversation history + await self._conversation_manager.update_history(user_input, agent_output) + + # After the agent executes, check again if we should prompt (in case more responsibilities were added) + # Only check if we haven't already asked and we have enough responsibilities + if not agent_output.finished: + # Check if we should prompt (but don't try to parse response from artificial input) + # We'll check again on the next user input + responsibilities_count = len(current_experience.experience.responsibilities.responsibilities) + if (_ReadinessAssessmentLLM.has_enough_responsibilities(responsibilities_count) and + not current_experience.asked_to_continue_to_linking): + # We have enough responsibilities and haven't asked yet + # We'll prompt on the next turn, for now return the agent output + pass + + # Agent is not finished, return its output + return agent_output + + # Agent finished, advance to the next sub-phase + current_experience.dive_in_phase = DiveInPhase.LINKING_RANKING if current_experience.dive_in_phase == DiveInPhase.LINKING_RANKING: if current_experience.experience.responsibilities.responsibilities: @@ -304,11 +477,61 @@ async def execute(self, user_input: AgentInput, context: ConversationContext) -> # The experiences are still being collected, but we can already store them so that we can # present them to the user even if data collection has not finished. - # The experiences will be overwritten every time + # The experiences will be overwritten every time, but we preserve responsibilities from CV injection experiences = self._collect_experiences_agent.get_experiences() - state.experiences_state.clear() + + # Helper function to normalize strings for matching + def _normalize(value: str | None) -> str: + return (value or "").strip().lower() + + # Create a new dict to store updated experiences, preserving existing ones with responsibilities + new_experiences_state = {} for exp in experiences: - state.experiences_state[exp.uuid] = ExperienceState(experience=exp) + # Try to find existing experience by UUID first (fast path) + existing_state = state.experiences_state.get(exp.uuid) + + # If not found by UUID, try matching by title/company/location (for CV-injected experiences) + matched_uuid = exp.uuid # Default to new experience's UUID + if not existing_state: + exp_key = ( + _normalize(exp.experience_title), + _normalize(exp.company), + _normalize(exp.location), + ) + for existing_uuid, existing in state.experiences_state.items(): + existing_key = ( + _normalize(existing.experience.experience_title), + _normalize(existing.experience.company), + _normalize(existing.experience.location), + ) + if existing_key == exp_key: + existing_state = existing + matched_uuid = existing_uuid # Use the existing UUID + break + + if existing_state and existing_state.experience.responsibilities.responsibilities: + # Preserve the existing experience with its responsibilities + # Update only the basic fields that might have changed + responsibilities_count = len(existing_state.experience.responsibilities.responsibilities) + self.logger.debug( + "Preserving responsibilities for experience {title=%s, uuid=%s, responsibilities=%d}", + exp.experience_title, + matched_uuid, + responsibilities_count + ) + existing_state.experience.experience_title = exp.experience_title + existing_state.experience.company = exp.company + existing_state.experience.location = exp.location + existing_state.experience.timeline = exp.timeline + existing_state.experience.work_type = exp.work_type + # Use the matched UUID (preserves CV-injected UUID if matched) + new_experiences_state[matched_uuid] = existing_state + else: + # Create a new experience state (no existing one or no responsibilities to preserve) + new_experiences_state[exp.uuid] = ExperienceState(experience=exp) + + # Replace the old state with the new one + state.experiences_state = new_experiences_state # If collecting is not finished then return the output to the user to continue collecting if not agent_output.finished: diff --git a/backend/app/conversations/routes.py b/backend/app/conversations/routes.py index cc21a03cd..0666861ff 100644 --- a/backend/app/conversations/routes.py +++ b/backend/app/conversations/routes.py @@ -32,15 +32,15 @@ from app.users.auth import Authentication, UserInfo -async def get_conversation_service(agent_director: LLMAgentDirector = Depends(get_agent_director), - application_state_manager: ApplicationStateManager = Depends( - get_application_state_manager), - conversation_memory_manager: ConversationMemoryManager = Depends( - get_conversation_memory_manager), - db: AsyncIOMotorDatabase = Depends( - CompassDBProvider.get_application_db), - metrics_service: IMetricsService = Depends( - get_metrics_service)) -> IConversationService: +def get_conversation_service(agent_director: LLMAgentDirector = Depends(get_agent_director), + application_state_manager: ApplicationStateManager = Depends( + get_application_state_manager), + conversation_memory_manager: ConversationMemoryManager = Depends( + get_conversation_memory_manager), + db: AsyncIOMotorDatabase = Depends( + CompassDBProvider.get_application_db), + metrics_service: IMetricsService = Depends( + get_metrics_service)) -> IConversationService: return ConversationService(agent_director=agent_director, application_state_metrics_recorder=ApplicationStateMetricsRecorder( application_state_manager=application_state_manager, diff --git a/backend/app/conversations/service.py b/backend/app/conversations/service.py index 9cf207e11..cd3b21df2 100644 --- a/backend/app/conversations/service.py +++ b/backend/app/conversations/service.py @@ -81,8 +81,7 @@ async def send(self, user_id: str, session_id: int, user_input: str, clear_memor if filter_pii: user_input = await sensitive_filter.obfuscate(user_input) - # set the sent_at for the user input - user_input = AgentInput(message=user_input, sent_at=datetime.now(timezone.utc)) + user_input = AgentInput(message=user_input, sent_at=datetime.now(timezone.utc), is_artificial=False) # set the state of the agent director, the conversation memory manager and all the agents state = await self._application_state_metrics_recorder.get_state(session_id) diff --git a/backend/app/users/cv/repository.py b/backend/app/users/cv/repository.py index 9c63a55b9..d83939b41 100644 --- a/backend/app/users/cv/repository.py +++ b/backend/app/users/cv/repository.py @@ -7,7 +7,7 @@ from app.server_dependencies.database_collections import Collections from app.users.cv.errors import DuplicateCVUploadError -from app.users.cv.types import UserCVUpload, UploadProcessState +from app.users.cv.types import UserCVUpload, UploadProcessState, CVStructuredExtraction from common_libs.time_utilities import get_now, datetime_to_mongo_date, mongo_date_to_datetime @@ -54,15 +54,23 @@ async def mark_failed(self, user_id: str, upload_id: str, *, error_code: str, er raise NotImplementedError() @abstractmethod - async def store_experiences(self, user_id: str, upload_id: str, *, experiences: list[str]) -> bool: + async def store_structured_extraction(self, user_id: str, upload_id: str, *, structured_extraction: CVStructuredExtraction) -> bool: raise NotImplementedError() @abstractmethod async def mark_cancelled(self, user_id: str, upload_id: str) -> bool: raise NotImplementedError() - @abstractmethod async def get_user_uploads(self, *, user_id: str) -> list[UserCVUpload]: + """Optional extension point: return completed uploads for a user.""" + raise NotImplementedError() + + @abstractmethod + async def mark_state_injected(self, user_id: str, upload_id: str) -> bool: + raise NotImplementedError() + + @abstractmethod + async def mark_injection_failed(self, user_id: str, upload_id: str, *, error: str) -> bool: raise NotImplementedError() @@ -88,7 +96,11 @@ def _to_db_doc(upload: UserCVUpload) -> dict: "last_activity_at": datetime_to_mongo_date(upload.last_activity_at or get_now()), "error_code": getattr(upload, "error_code", None), "error_detail": getattr(upload, "error_detail", None), - "experience_bullets": getattr(upload, "experience_bullets", None), + "state_injected": getattr(upload, "state_injected", False), + "injection_error": getattr(upload, "injection_error", None), + "structured_extraction": ( + upload.structured_extraction.model_dump() if upload.structured_extraction else None + ), } @staticmethod @@ -108,7 +120,12 @@ def _from_db_doc(doc: dict) -> UserCVUpload: last_activity_at=mongo_date_to_datetime(doc.get("last_activity_at")), error_code=doc.get("error_code"), error_detail=doc.get("error_detail"), - experience_bullets=doc.get("experience_bullets"), + state_injected=doc.get("state_injected", False), + injection_error=doc.get("injection_error"), + structured_extraction=( + CVStructuredExtraction.model_validate(doc.get("structured_extraction")) + if doc.get("structured_extraction") else None + ), ) async def insert_upload(self, upload: UserCVUpload) -> str: @@ -151,7 +168,7 @@ async def insert_upload(self, upload: UserCVUpload) -> str: reset_payload["cancel_requested"] = False reset_payload["error_code"] = None reset_payload["error_detail"] = None - reset_payload["experience_bullets"] = None + reset_payload["structured_extraction"] = None await self._collection.update_one( {"_id": existing["_id"]}, {"$set": reset_payload} ) @@ -247,7 +264,7 @@ async def mark_failed(self, user_id: str, upload_id: str, *, error_code: str, er ) return res.modified_count > 0 - async def store_experiences(self, user_id: str, upload_id: str, *, experiences: list[str]) -> bool: + async def store_structured_extraction(self, user_id: str, upload_id: str, *, structured_extraction: CVStructuredExtraction) -> bool: res = await self._collection.update_one( { "user_id": user_id, @@ -256,7 +273,7 @@ async def store_experiences(self, user_id: str, upload_id: str, *, experiences: }, { "$set": { - "experience_bullets": experiences, + "structured_extraction": structured_extraction.model_dump(), "last_activity_at": datetime_to_mongo_date(get_now()), }, }, @@ -296,6 +313,38 @@ async def mark_cancelled(self, user_id: str, upload_id: str) -> bool: ) return res.modified_count > 0 + async def mark_state_injected(self, user_id: str, upload_id: str) -> bool: + res = await self._collection.update_one( + { + "user_id": user_id, + "upload_id": upload_id, + }, + { + "$set": { + "state_injected": True, + "injection_error": None, + "last_activity_at": datetime_to_mongo_date(get_now()), + }, + }, + ) + return res.modified_count > 0 + + async def mark_injection_failed(self, user_id: str, upload_id: str, *, error: str) -> bool: + res = await self._collection.update_one( + { + "user_id": user_id, + "upload_id": upload_id, + }, + { + "$set": { + "state_injected": False, + "injection_error": error, + "last_activity_at": datetime_to_mongo_date(get_now()), + }, + }, + ) + return res.modified_count > 0 + async def get_user_uploads(self, *, user_id: str) -> list[UserCVUpload]: """ Get all COMPLETED uploads for a specific user diff --git a/backend/app/users/cv/routes.py b/backend/app/users/cv/routes.py index bc54cf1c5..9eb996b2c 100644 --- a/backend/app/users/cv/routes.py +++ b/backend/app/users/cv/routes.py @@ -5,7 +5,6 @@ from fastapi import APIRouter, Depends, HTTPException, Path, Request from app.constants.errors import HTTPErrorResponse -from app.users.auth import Authentication, UserInfo from app.users.cv.constants import ( MAX_CV_SIZE_BYTES, MAX_MULTIPART_OVERHEAD_BYTES, @@ -14,11 +13,19 @@ ) from app.users.cv.errors import MarkdownConversionTimeoutError, MarkdownTooLongError, PayloadTooLargeErrorResponse, \ EmptyMarkdownError, CVLimitExceededError, CVUploadRateLimitExceededError, DuplicateCVUploadError +from app.users.auth import Authentication, UserInfo +from app.users.cv.service import CVUploadService, ICVUploadService +from app.users.cv.utils.cv_structured_extractor import CVStructuredExperienceExtractor +from app.users.cv.utils.cv_responsibilities_extractor import CVResponsibilitiesExtractor +from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool from app.users.cv.get_repository import get_user_cv_repository from app.users.cv.repository import IUserCVRepository -from app.users.cv.service import CVUploadService, ICVUploadService from app.users.cv.storage import _get_cv_storage_service, ICVCloudStorageService -from app.users.cv.types import CVUploadStatusResponse, CVUploadResponseListItem +from app.server_dependencies.application_state_dependencies import get_application_state_manager +from app.users.cv.types import CVUploadStatus, CVUploadListItemResponse +from app.users.get_user_preferences_repository import get_user_preferences_repository +from app.users.repositories import UserPreferenceRepository + logger = logging.getLogger(__name__) @@ -33,13 +40,24 @@ class _PayloadTooLargeErrorResponse(PayloadTooLargeErrorResponse): async def _get_cv_service( repository: IUserCVRepository = Depends(get_user_cv_repository), - cv_storage_service: ICVCloudStorageService = Depends(_get_cv_storage_service)) -> ICVUploadService: + cv_storage_service: ICVCloudStorageService = Depends(_get_cv_storage_service), + application_state_manager=Depends(get_application_state_manager), + user_preferences_repository: UserPreferenceRepository = Depends(get_user_preferences_repository)) -> ICVUploadService: global _cv_service_singleton if _cv_service_singleton is None: async with _cv_service_lock: if _cv_service_singleton is None: - _cv_service_singleton = CVUploadService(repository=repository, cv_cloud_storage_service=cv_storage_service) + _tool = _ResponsibilitiesExtractionTool(logger) + _resp_extractor = CVResponsibilitiesExtractor(logger, _tool) + _structured_extractor = CVStructuredExperienceExtractor(logger, _resp_extractor) + _cv_service_singleton = CVUploadService( + repository=repository, + cv_cloud_storage_service=cv_storage_service, + structured_extractor=_structured_extractor, + application_state_manager=application_state_manager, + user_preferences_repository=user_preferences_repository, + ) return _cv_service_singleton @@ -97,6 +115,37 @@ def _get_filename_from_headers(request: Request) -> str | None: def add_user_cv_routes(users_router: APIRouter, auth: Authentication): router = APIRouter(prefix="/{user_id}/cv", tags=["users-cv"]) + @router.get( + path="", + status_code=HTTPStatus.OK, + response_model=list[CVUploadListItemResponse], + responses={ + HTTPStatus.FORBIDDEN: {"model": HTTPErrorResponse}, + HTTPStatus.INTERNAL_SERVER_ERROR: {"model": HTTPErrorResponse}, + }, + name="list user uploaded CVs", + description=( + "List previously uploaded CVs for a user (COMPLETED uploads only)." + ), + ) + async def _get_uploaded_cvs( + user_id: str = Path(description="the unique identifier of the user", examples=["1"]), + user_info: UserInfo = Depends(auth.get_user_info()), + service: ICVUploadService = Depends(_get_cv_service), + ) -> list[CVUploadListItemResponse]: + """ + List previously uploaded CVs for a user (COMPLETED uploads only). + """ + try: + if user_info.user_id != user_id: + raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Cannot list CVs for a different user") + return await service.get_user_cvs(user_id=user_id) + except HTTPException: + raise + except Exception as e: + logger.exception(e) + raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Oops! Something went wrong.") + @router.post( path="", status_code=HTTPStatus.OK, @@ -132,6 +181,9 @@ async def _upload_cv( user_info: UserInfo = Depends(auth.get_user_info()), service: ICVUploadService = Depends(_get_cv_service), ) -> dict: + """ + Upload a CV file by streaming the raw request body. + """ # Validate size early using Content-Length (no multipart overhead for raw) _validate_request_size_header(request) content_length_header = request.headers.get("content-length") @@ -200,6 +252,7 @@ async def _upload_cv( user_id=user_id, file_bytes=file_bytes, filename=filename, + session_id=None, # Service will fetch it internally ) logger.info("CV processed successfully {user_id=%s, upload_id=%s}", user_id, upload_id) return {"upload_id": upload_id} @@ -232,7 +285,7 @@ async def _upload_cv( @router.post("/{upload_id}/cancel", response_model=dict) async def cancel_cv_upload( - user_id: str = Path(..., description="User ID"), + user_id: str = Path(..., description="User's ID"), upload_id: str = Path(..., description="Upload ID to cancel"), service: ICVUploadService = Depends(get_cv_service), user_info: UserInfo = Depends(auth.get_user_info()), @@ -264,7 +317,24 @@ async def cancel_cv_upload( detail="Failed to cancel upload" ) - @router.get("/{upload_id}", response_model=CVUploadStatusResponse) + @router.post("/{upload_id}/inject", status_code=HTTPStatus.OK) + async def _reinject_cv( + user_id: str = Path(..., description="User ID"), + upload_id: str = Path(..., description="Upload ID to reinject"), + user_info: UserInfo = Depends(auth.get_user_info()), + service: ICVUploadService = Depends(_get_cv_service), + ) -> dict: + """ + Reinject CV state into the user's current session. + """ + if user_info.user_id != user_id: + raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Cannot reinject CV for a different user") + + # Service will fetch the most recent session internally + result = await service.reinject_upload(user_id=user_id, upload_id=upload_id, session_id=None) + return result + + @router.get("/{upload_id}", response_model=CVUploadStatus) async def get_upload_status( user_id: str = Path(..., description="User ID"), upload_id: str = Path(..., description="Upload ID to get status for"), @@ -298,46 +368,4 @@ async def get_upload_status( detail="Failed to get upload status" ) - @router.get( - path="", - status_code=HTTPStatus.OK, - response_model=list[CVUploadResponseListItem], - responses={ - HTTPStatus.FORBIDDEN: {"model": HTTPErrorResponse}, - HTTPStatus.INTERNAL_SERVER_ERROR: {"model": HTTPErrorResponse}, - }, - description="Retrieve all CVs uploaded by the user", - ) - async def get_user_cvs( - user_id: str = Path(description="the unique identifier of the user", examples=["1"]), - user_info: UserInfo = Depends(auth.get_user_info()), - service: ICVUploadService = Depends(_get_cv_service), - ) -> list[CVUploadResponseListItem]: - try: - if user_info.user_id != user_id: - raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Cannot access CVs for a different user") - - # Get user CVs through the service - uploads = await service.get_user_cvs(user_id=user_id) - - return [ - CVUploadResponseListItem( - upload_id=upload.upload_id, - filename=upload.filename, - uploaded_at=upload.created_at, - upload_process_state=upload.upload_process_state, - experiences_data=upload.experience_bullets - ) - for upload in uploads - ] - - except HTTPException: - raise - except Exception as e: - logger.exception(e) - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Failed to retrieve user CVs" - ) - users_router.include_router(router) diff --git a/backend/app/users/cv/service.py b/backend/app/users/cv/service.py index fa3c5f055..2805a3257 100644 --- a/backend/app/users/cv/service.py +++ b/backend/app/users/cv/service.py @@ -3,17 +3,20 @@ from abc import ABC, abstractmethod from typing import Optional -from app.app_config import get_application_config +from app.users.cv.types import UploadProcessState, CVUploadErrorCode, CVUploadListItemResponse, CVStructuredExtraction, CVUploadStatus from app.users.cv.constants import MAX_MARKDOWN_CHARS, MARKDOWN_CONVERSION_TIMEOUT_SECONDS, RATE_LIMIT_WINDOW_MINUTES, \ DEFAULT_MAX_UPLOADS_PER_USER, DEFAULT_RATE_LIMIT_PER_MINUTE from app.users.cv.errors import MarkdownTooLongError, EmptyMarkdownError, \ CVUploadRateLimitExceededError, CVLimitExceededError, DuplicateCVUploadError, MarkdownConversionTimeoutError -from app.users.cv.repository import IUserCVRepository -from app.users.cv.storage import build_user_cv_upload_record, ICVCloudStorageService -from app.users.cv.types import UploadProcessState, CVUploadErrorCode, UserCVUpload -from app.users.cv.utils.llm_extractor import CVExperienceExtractor from app.users.cv.utils.markdown_converter import convert_cv_bytes_to_markdown from common_libs.call_with_timeout.call_with_timeout import call_with_timeout +from app.users.cv.utils.cv_structured_extractor import CVStructuredExperienceExtractor +from app.users.cv.repository import IUserCVRepository +from app.users.cv.services.state_injection_service import StateInjectionService +from app.application_state import IApplicationStateManager +from app.app_config import get_application_config +from app.users.cv.storage import build_user_cv_upload_record, ICVCloudStorageService +from app.users.repositories import UserPreferenceRepository class ICVUploadService(ABC): @@ -21,16 +24,19 @@ class ICVUploadService(ABC): async def parse_cv(self, *, user_id: str, file_bytes: bytes, - filename: str) -> str: + filename: str, + session_id: int | None) -> str: """ Schedule a CV upload and parsing process. + :param session_id: The conversation session ID to inject state into after upload. :param user_id: The ID of the user uploading the CV. :param file_bytes: The raw bytes of the uploaded CV file. :param filename: The original filename of the uploaded CV. :return: the upload_id """ + raise NotImplementedError() @abstractmethod async def cancel_upload(self, *, user_id: str, upload_id: str) -> bool: @@ -38,22 +44,28 @@ async def cancel_upload(self, *, user_id: str, upload_id: str) -> bool: Cancel an ongoing CV upload process. Returns True if cancellation was successful, False if upload not found or already completed. """ + raise NotImplementedError() @abstractmethod - async def get_upload_status(self, *, user_id: str, upload_id: str) -> Optional[dict]: + async def get_upload_status(self, *, user_id: str, upload_id: str) -> Optional[CVUploadStatus]: """ Get the status of an upload process. Returns upload details if found, None if not found. """ + raise NotImplementedError() @abstractmethod - async def get_user_cvs(self, *, user_id: str) -> list[UserCVUpload]: - """ - Get all CVs uploaded by a specific user. + async def get_user_cvs(self, *, user_id: str) -> list[CVUploadListItemResponse]: + """Return a list of completed uploads for a user (for listing in UI).""" + raise NotImplementedError() - :param user_id: The ID of the user. - :return: A list of the user's CV uploads. + @abstractmethod + async def reinject_upload(self, *, user_id: str, upload_id: str, session_id: int | None = None) -> dict: + """ + Re-run state injection for a previously uploaded CV. If session_id is None, will fetch the most recent session. + Returns a dict with 'state_injected' (bool) and 'experience_bullets' (list[str] | None). """ + raise NotImplementedError() class CVUploadService(ICVUploadService): @@ -61,13 +73,20 @@ class CVUploadService(ICVUploadService): CV Upload Service. """ - def __init__(self, repository: IUserCVRepository, cv_cloud_storage_service: ICVCloudStorageService): + def __init__(self, repository: IUserCVRepository, cv_cloud_storage_service: ICVCloudStorageService, + structured_extractor: CVStructuredExperienceExtractor, + application_state_manager: IApplicationStateManager | None = None, + user_preferences_repository: UserPreferenceRepository | None = None): self._background_tasks: set[asyncio.Task] = set() self._logger = logging.getLogger(self.__class__.__name__) - self._experiences_extractor = CVExperienceExtractor(self._logger) + self._structured_extractor = structured_extractor self._repository = repository self._cv_cloud_storage_service = cv_cloud_storage_service + self._user_preferences_repository = user_preferences_repository + self._injection_service: StateInjectionService | None = ( + StateInjectionService(application_state_manager) if application_state_manager else None + ) @staticmethod def _map_error_code(error: Exception) -> CVUploadErrorCode: @@ -122,8 +141,36 @@ async def _run_with_cancellation(self, task.cancel() raise - async def parse_cv(self, *, user_id: str, file_bytes: bytes, filename: str) -> str: + async def _get_most_recent_session_id(self, user_id: str) -> int | None: + """ + Get the most recent session ID for a user from their preferences. + Returns None if no sessions exist or if repository is not available. + """ + if not self._user_preferences_repository: + self._logger.debug("User preferences repository not available, skipping session lookup") + return None + + try: + user_preferences = await self._user_preferences_repository.get_user_preference_by_user_id(user_id) + if user_preferences and user_preferences.sessions and len(user_preferences.sessions) > 0: + # Use the first session (most recent) for injection + session_id = user_preferences.sessions[0] + self._logger.info("Using most recent session_id from user preferences: %s", session_id) + return session_id + else: + self._logger.info("User has no sessions, skipping state injection") + return None + except Exception as e: + self._logger.warning("Failed to get user preferences for session lookup: %s", e) + # Continue without session_id - injection will be skipped + return None + + async def parse_cv(self, *, user_id: str, file_bytes: bytes, filename: str, session_id: int | None = None) -> str: self._logger.info("Preparing upload record {filename='%s', size_bytes=%s}", filename, len(file_bytes)) + + # If session_id not provided, try to get the most recent session from user preferences + if session_id is None: + session_id = await self._get_most_recent_session_id(user_id) # We'll run conversion/extraction in background; validations happen there. # For immediate response we just need upload_id. # Background pipeline will fill experiences; immediate return only includes upload_id @@ -181,14 +228,17 @@ async def _pipeline(): if len(md) > MAX_MARKDOWN_CHARS: raise MarkdownTooLongError(len(md), MAX_MARKDOWN_CHARS) - # Cancellation-aware extraction + # Cancellation-aware structured extraction (do not persist bullets) await self._repository.update_state(user_id, upload_id, to_state=UploadProcessState.EXTRACTING) - bullets_local = await self._run_with_cancellation( + _structured = await self._run_with_cancellation( upload_id, - self._experiences_extractor.extract_experiences, + self._structured_extractor.extract_structured_experiences, md, ) - self._logger.info("[Upload %s] Experiences extracted {items=%s}", upload_id, len(bullets_local)) + self._logger.info("[Upload %s] Structured experiences extracted {items=%s}", upload_id, _structured.extraction_metadata.get("total_experiences")) + + # Store structured extraction in database + await self._repository.store_structured_extraction(user_id, upload_id, structured_extraction=_structured) # Storage with cancellation await self._repository.update_state(user_id, upload_id, to_state=UploadProcessState.UPLOADING_TO_GCS) @@ -200,11 +250,36 @@ async def _pipeline(): markdown_text=md, original_bytes=file_bytes, ) - # Persist extracted experiences, then mark completed - try: - await self._repository.store_experiences(user_id, upload_id, experiences=bullets_local) - except Exception: - self._logger.warning("[Upload %s] Failed to persist experiences_data", upload_id) + # Attempt state injection when possible (non-blocking for completion) + self._logger.info("[Upload %s] Checking injection conditions: injection_service=%s, session_id=%s", + upload_id, self._injection_service is not None, session_id) + if self._injection_service and session_id is not None: + try: + self._logger.info("[Upload %s] Starting state injection for session_id=%s", upload_id, session_id) + success = await self._injection_service.inject_cv_data( + user_id=user_id, + session_id=session_id, + structured_extraction=_structured, + ) + if success: + self._logger.info("[Upload %s] State injection successful", upload_id) + await self._repository.mark_state_injected(user_id, upload_id) + else: + self._logger.warning("[Upload %s] State injection returned False", upload_id) + await self._repository.mark_injection_failed(user_id, upload_id, error="State injection failed") + except Exception as inj_err: + self._logger.error("[Upload %s] Injection failed with exception: %s", upload_id, inj_err, exc_info=True) + try: + await self._repository.mark_injection_failed(user_id, upload_id, error=str(inj_err)) + except Exception: + self._logger.warning("[Upload %s] Failed to persist injection failure", upload_id) + else: + if self._injection_service is None: + self._logger.info("[Upload %s] Skipping injection: injection_service is None", upload_id) + if session_id is None: + self._logger.info("[Upload %s] Skipping injection: session_id is None", upload_id) + + # Mark completed regardless of injection outcome await self._repository.update_state(user_id, upload_id, to_state=UploadProcessState.COMPLETED) self._logger.info("[Upload %s] Pipeline completed successfully", upload_id) except asyncio.CancelledError: @@ -256,7 +331,60 @@ async def cancel_upload(self, *, user_id: str, upload_id: str) -> bool: self._logger.exception(e) return False - async def get_upload_status(self, *, user_id: str, upload_id: str) -> Optional[dict]: + @staticmethod + def _derive_experience_bullets(structured_extraction: CVStructuredExtraction | None) -> list[str] | None: + """ + Derive experience bullets from structured extraction data. + Returns a list of formatted bullet strings, or None if no structured extraction available. + """ + if not structured_extraction: + return None + + bullets = [] + + # Prefer experience_entities if available (more complete data) + if structured_extraction.experience_entities: + for entity in structured_extraction.experience_entities: + parts = [entity.experience_title] + + if entity.company: + parts.append(f"at {entity.company}") + + if entity.location: + parts.append(entity.location) + + if entity.timeline: + if entity.timeline.start: + parts.append(entity.timeline.start) + if entity.timeline.end: + parts.append(entity.timeline.end) + + bullets.append(" ".join(parts).strip()) + + # Fall back to collected_data if no experience_entities + elif structured_extraction.collected_data: + for data in structured_extraction.collected_data: + parts = [] + if data.experience_title: + parts.append(data.experience_title) + + if data.company: + parts.append(f"at {data.company}") + + if data.location: + parts.append(data.location) + + if data.start_date: + parts.append(data.start_date) + if data.end_date: + parts.append(data.end_date) + + if parts: + bullets.append(" ".join(parts).strip()) + + return bullets if bullets else None + + async def get_upload_status(self, *, user_id: str, upload_id: str) -> Optional[CVUploadStatus]: """ Get the status of an upload process. Returns upload details if found, None if not found. @@ -268,39 +396,130 @@ async def get_upload_status(self, *, user_id: str, upload_id: str) -> Optional[d self._logger.debug("Upload not found {user_id=%s, upload_id=%s}", user_id, upload_id) return None - # Convert MongoDB document to a clean dict for API response - status_info = { - "upload_id": upload_record.get("upload_id"), - "user_id": upload_record.get("user_id"), - "filename": upload_record.get("filename"), - "upload_process_state": upload_record.get("upload_process_state"), - "cancel_requested": upload_record.get("cancel_requested", False), - "created_at": upload_record.get("created_at"), - "last_activity_at": upload_record.get("last_activity_at"), - "error_code": upload_record.get("error_code"), - "error_detail": upload_record.get("error_detail"), - "experience_bullets": upload_record.get("experience_bullets"), - } + # Parse structured extraction if available + structured_extraction = None + structured_extraction_data = upload_record.get("structured_extraction") + if structured_extraction_data: + try: + structured_extraction = CVStructuredExtraction.model_validate(structured_extraction_data) + except Exception as e: + self._logger.warning("Failed to parse structured_extraction for upload {upload_id=%s}: %s", upload_id, e) + + # Derive experience bullets if structured extraction exists and upload is completed + experience_bullets = None + if upload_record.get("upload_process_state") == UploadProcessState.COMPLETED.value and structured_extraction: + experience_bullets = self._derive_experience_bullets(structured_extraction) + + status = CVUploadStatus( + upload_id=upload_record.get("upload_id"), + user_id=upload_record.get("user_id"), + filename=upload_record.get("filename"), + upload_process_state=UploadProcessState(upload_record.get("upload_process_state")), + cancel_requested=upload_record.get("cancel_requested", False), + created_at=upload_record.get("created_at"), + last_activity_at=upload_record.get("last_activity_at"), + error_code=upload_record.get("error_code"), + error_detail=upload_record.get("error_detail"), + state_injected=upload_record.get("state_injected"), + injection_error=upload_record.get("injection_error"), + experience_bullets=experience_bullets, + ) self._logger.debug("Retrieved upload status {user_id=%s, upload_id=%s, state=%s}", - user_id, upload_id, status_info.get("upload_process_state")) + user_id, upload_id, status.upload_process_state) - return status_info + return status except Exception as e: self._logger.exception(e) return None - async def get_user_cvs(self, *, user_id: str) -> list[UserCVUpload]: - """ - Get all CVs uploaded by a specific user. - """ + async def get_user_cvs(self, *, user_id: str) -> list[CVUploadListItemResponse]: + """Return a simplified list of user's uploaded CVs (COMPLETED only).""" try: uploads = await self._repository.get_user_uploads(user_id=user_id) - - self._logger.debug("Retrieved %d CVs for user {user_id=%s}", len(uploads), user_id) - return uploads - + return [ + CVUploadListItemResponse( + upload_id=u.upload_id, + filename=u.filename, + uploaded_at=u.created_at.isoformat().replace("+00:00", "Z"), + upload_process_state=u.upload_process_state, + ) + for u in uploads + ] except Exception as e: self._logger.exception(e) - return [] + raise + + async def reinject_upload(self, *, user_id: str, upload_id: str, session_id: int | None = None) -> dict: + if not self._injection_service: + self._logger.info( + "[Upload %s] Reinjection skipped: injection service not configured", upload_id + ) + return {"state_injected": False, "experience_bullets": None} + + # If session_id not provided, try to get the most recent session from user preferences + if session_id is None: + session_id = await self._get_most_recent_session_id(user_id) + if session_id is None: + self._logger.warning( + "[Upload %s] Reinjection failed: no session_id available", upload_id + ) + return {"state_injected": False, "experience_bullets": None, "error": "NO_SESSION"} + + try: + record = await self._repository.get_upload_by_id(user_id, upload_id) + if not record: + self._logger.warning( + "[Upload %s] Reinjection failed: upload record not found for user %s", + upload_id, + user_id, + ) + return {"state_injected": False, "experience_bullets": None} + + # Get structured extraction from database + structured_extraction_dict = record.get("structured_extraction") + if not structured_extraction_dict: + self._logger.warning( + "[Upload %s] Reinjection failed: no stored structured extraction", upload_id + ) + return {"state_injected": False, "experience_bullets": None} + + # Deserialize structured extraction from database + try: + structured = CVStructuredExtraction.model_validate(structured_extraction_dict) + except Exception as validation_error: + self._logger.error( + "[Upload %s] Reinjection failed: invalid structured extraction data (%s)", + upload_id, + validation_error, + ) + return {"state_injected": False, "experience_bullets": None} + + # Derive experience bullets + experience_bullets = self._derive_experience_bullets(structured) + + success = await self._injection_service.inject_cv_data( + user_id=user_id, + session_id=session_id, + structured_extraction=structured, + ) + + if success: + await self._repository.mark_state_injected(user_id, upload_id) + else: + await self._repository.mark_injection_failed(user_id, upload_id, error="Reinjection failed") + + return {"state_injected": success, "experience_bullets": experience_bullets} + + except Exception as exc: + self._logger.error( + "[Upload %s] Reinjection raised exception: %s", upload_id, exc, exc_info=True + ) + try: + await self._repository.mark_injection_failed(user_id, upload_id, error=str(exc)) + except Exception: + self._logger.warning( + "[Upload %s] Failed to persist reinjection failure", upload_id + ) + return {"state_injected": False, "experience_bullets": None, "error": str(exc)} diff --git a/backend/app/users/cv/services/state_injection_service.py b/backend/app/users/cv/services/state_injection_service.py new file mode 100644 index 000000000..a2dda31f9 --- /dev/null +++ b/backend/app/users/cv/services/state_injection_service.py @@ -0,0 +1,240 @@ +import logging + +from app.agent.collect_experiences_agent._types import CollectedData +from app.agent.experience.experience_entity import ExperienceEntity +from app.agent.explore_experiences_agent_director import ( + ExperienceState, + DiveInPhase, +) +from app.application_state import ApplicationState, IApplicationStateManager +from app.users.cv.types import CVStructuredExtraction + + +class IStateInjectionService: + """Interface for state injection service.""" + + async def inject_cv_data( + self, + *, + user_id: str, + session_id: int, + structured_extraction: CVStructuredExtraction + ) -> bool: + """ + Inject CV structured extraction data into agent states. + + :param user_id: The user ID + :param session_id: The session ID + :param structured_extraction: The structured extraction data to inject + :return: True if injection was successful, False otherwise + """ + raise NotImplementedError() + + +class StateInjectionService(IStateInjectionService): + @staticmethod + def _count_responsibilities(experience: ExperienceEntity) -> int: + responsibilities_data = getattr(experience, "responsibilities", None) + if not responsibilities_data or not getattr(responsibilities_data, "responsibilities", None): + return 0 + + return len([ + resp.strip() + for resp in responsibilities_data.responsibilities + if isinstance(resp, str) and resp.strip() + ]) + + """ + Service for injecting CV structured extraction data into agent states. + + This service is designed to be reusable for future API endpoints and + ensures that CV data is properly integrated into the Compass flow + without disrupting existing functionality. + """ + + def __init__(self, application_state_manager: IApplicationStateManager): + self._application_state_manager = application_state_manager + self._logger = logging.getLogger(self.__class__.__name__) + + async def inject_cv_data( + self, + *, + user_id: str, + session_id: int, + structured_extraction: CVStructuredExtraction + ) -> bool: + """ + Inject CV structured extraction data into agent states. + + :param user_id: The user ID + :param session_id: The session ID + :param structured_extraction: The structured extraction data to inject + :return: True if injection was successful, False otherwise + """ + try: + self._logger.info("Starting CV data injection for user %s, session %s", user_id, session_id) + + # Get current application state + state = await self._application_state_manager.get_state(session_id) + + # Inject into CollectExperiencesAgent state + self._inject_to_collect_experiences_agent(state, structured_extraction.collected_data) + + # Inject into ExploreExperiencesAgent state + self._inject_to_explore_experiences_agent(state, structured_extraction.experience_entities) + + # Inject into SkillsExplorerAgent state + self._inject_to_skills_explorer_agent(state, structured_extraction.experience_entities) + + # Save updated state + await self._application_state_manager.save_state(state) + + self._logger.info("Successfully injected CV data into agent states for user %s, session %s", user_id, session_id) + return True + + except Exception as e: + self._logger.error("Failed to inject CV data into agent states: %s", e) + return False + + def _inject_to_collect_experiences_agent( + self, + state: ApplicationState, + collected_data: list[CollectedData] + ): + """Inject data into CollectExperiencesAgent state without disrupting existing flow.""" + + # Add new collected data to existing state + state.collect_experience_state.collected_data.extend(collected_data) + + # Mark that experiences have been collected from CV + state.collect_experience_state.first_time_visit = False + + self._logger.debug("Injected %d collected data items into CollectExperiencesAgent state", len(collected_data)) + + def _inject_to_explore_experiences_agent( + self, + state: ApplicationState, + experience_entities: list[ExperienceEntity] + ): + """ + Inject ExperienceEntity objects into ExploreExperiencesAgent state. + + This allows the existing skills processing pipeline to handle + the experiences through normal Compass flow. + """ + + # Add experiences to the experiences_state dict + for experience in experience_entities: + responsibilities_count = self._count_responsibilities(experience) + has_responsibilities = responsibilities_count > 0 + + self._logger.info( + "Injection check for experience {title=%s, uuid=%s, responsibilities=%d}", + getattr(experience, "experience_title", None), + getattr(experience, "uuid", None), + responsibilities_count, + ) + + _existing_key, existing_state = self._find_existing_experience(state, experience) + + if existing_state: + if has_responsibilities: + responsibilities_bullets = "\n".join( + f"• {resp.strip()}" for resp in experience.responsibilities.responsibilities if resp.strip() + ) + if responsibilities_bullets: + justification_question = ( + "These responsibilities were captured from your CV upload. Please confirm they look right." + ) + justification_answer = responsibilities_bullets + existing_state.experience.questions_and_answers = list(existing_state.experience.questions_and_answers) + if (justification_question, justification_answer) not in existing_state.experience.questions_and_answers: + existing_state.experience.questions_and_answers.append((justification_question, justification_answer)) + # Update responsibilities from CV extraction (CV is the authoritative source) + existing_state.experience.responsibilities = experience.responsibilities + # The summary is generated by the SkillsExplorerAgent, so we don't need to inject it + existing_state.experience.summary = None + # Reset to NOT_STARTED so agent director can decide the flow based on responsibilities + if existing_state.dive_in_phase != DiveInPhase.PROCESSED: + existing_state.dive_in_phase = DiveInPhase.NOT_STARTED + continue + + # Ensure questions_and_answers captures CV-derived responsibilities as justification + if has_responsibilities: + responsibilities_bullets = "\n".join( + f"• {resp.strip()}" for resp in experience.responsibilities.responsibilities if resp.strip() + ) + if responsibilities_bullets: + justification_question = ( + "These responsibilities were captured from your CV upload. Please confirm they look right." + ) + justification_answer = responsibilities_bullets + experience.questions_and_answers = list(experience.questions_and_answers) + experience.questions_and_answers.append((justification_question, justification_answer)) + + # The summary is generated by the SkillsExplorerAgent, so we don't need to inject it + experience.summary = None + + # Store with NOT_STARTED; we will let the normal flow advance sub-phases + experience_state = ExperienceState( + dive_in_phase=DiveInPhase.NOT_STARTED, + experience=experience + ) + + state.explore_experiences_director_state.experiences_state[experience.uuid] = experience_state + + self._logger.debug("Injected %d experience entities into ExploreExperiencesAgent state", len(experience_entities)) + + def _inject_to_skills_explorer_agent( + self, + state: ApplicationState, + experience_entities: list[ExperienceEntity] + ): + """ + Inject experience entities into SkillsExplorerAgent state. + + Agent director will decide the flow based on responsibilities, so we treat + all CV-injected experiences as fresh (first-time) for the SkillsExplorerAgent. + """ + + for experience in experience_entities: + # Treat CV-injected experiences as fresh - agent director will decide flow + state.skills_explorer_agent_state.first_time_for_experience.pop(experience.uuid, None) + + structured_summary = ExperienceEntity.get_structured_summary( + experience_title=experience.experience_title, + company=experience.company, + location=experience.location, + work_type=experience.work_type.name if experience.work_type else None, + start_date=experience.timeline.start if experience.timeline else None, + end_date=experience.timeline.end if experience.timeline else None + ) + + # Remove from experiences_explored if present, so it's treated as fresh + try: + state.skills_explorer_agent_state.experiences_explored.remove(structured_summary) + except ValueError: + pass + + self._logger.debug("Injected %d experience entities into SkillsExplorerAgent state", len(experience_entities)) + + def _find_existing_experience(self, state: ApplicationState, experience: ExperienceEntity) -> tuple[str, ExperienceState] | tuple[None, None]: + def _normalize(value: str | None) -> str: + return (value or "").strip().lower() + + target = ( + _normalize(experience.experience_title), + _normalize(experience.company), + _normalize(experience.location), + ) + + for key, existing in state.explore_experiences_director_state.experiences_state.items(): + candidate = ( + _normalize(existing.experience.experience_title), + _normalize(existing.experience.company), + _normalize(existing.experience.location), + ) + if candidate == target: + return key, existing + + return None, None diff --git a/backend/app/users/cv/storage.py b/backend/app/users/cv/storage.py index 088a49a71..975898c56 100644 --- a/backend/app/users/cv/storage.py +++ b/backend/app/users/cv/storage.py @@ -21,6 +21,11 @@ def upload_cv(self, original_bytes: bytes) -> None: raise NotImplementedError() + @abstractmethod + def download_markdown(self, *, object_path: str) -> str: + """Retrieve the stored markdown text for a previously uploaded CV.""" + raise NotImplementedError() + _cv_storage_service_lock = asyncio.Lock() _cv_storage_service_singleton: ICVCloudStorageService | None = None @@ -102,4 +107,8 @@ def upload_cv(self, # If google cloud storage is not available, we continue without GCS upload # we will log a error and continue without GCS upload # The database record will still be saved so polling works - # TODO: Remember to add raise \ No newline at end of file + # TODO: Remember to add raise + + def download_markdown(self, *, object_path: str) -> str: + blob = self._bucket.blob(object_path) + return blob.download_as_text(encoding="utf-8") \ No newline at end of file diff --git a/backend/app/users/cv/test_repository.py b/backend/app/users/cv/test_repository.py index 6e32160b4..97bcf7a68 100644 --- a/backend/app/users/cv/test_repository.py +++ b/backend/app/users/cv/test_repository.py @@ -313,27 +313,31 @@ async def test_atomic_state_transition_handles_concurrent_updates(self, get_user final_doc = await repository.get_upload_by_id(user_id, upload.upload_id) assert final_doc["upload_process_state"] == UploadProcessState.CONVERTING + @pytest.mark.asyncio - async def test_list_uploads_for_user_returns_all_uploads(self, get_user_cv_repository: Awaitable[UserCVRepository]): + async def test_get_user_uploads_returns_only_completed_sorted(self, get_user_cv_repository: Awaitable[UserCVRepository]): repository = await get_user_cv_repository + user_id = "user-list" now = datetime.now(timezone.utc) - user_id = "user-1" - # GIVEN multiple uploads for the user - uploads = [ - _get_upload(user_id=user_id, created_at=now - timedelta(minutes=i), suffix=str(i), md5_hash=f"hash_{i}") - for i in range(5) - ] - for upload in uploads: - upload.upload_process_state = UploadProcessState.COMPLETED - await repository.insert_upload(upload) - - # WHEN listing uploads - results = await repository.get_user_uploads(user_id=user_id) - - # THEN all uploads are returned, sorted by created_at descending - assert len(results) == 5 - sorted_uploads = sorted(uploads, key=lambda u: u.created_at, reverse=True) - for result, expected in zip(results, sorted_uploads): - assert result.upload_id == expected.upload_id - assert result.filename == expected.filename + # GIVEN uploads in different states and times + u_old_completed = _get_upload(user_id=user_id, created_at=now - timedelta(minutes=10), suffix="a", md5_hash="h1") + u_old_completed.upload_process_state = UploadProcessState.COMPLETED + + u_recent_completed = _get_upload(user_id=user_id, created_at=now - timedelta(minutes=1), suffix="b", md5_hash="h2") + u_recent_completed.upload_process_state = UploadProcessState.COMPLETED + + u_recent_failed = _get_upload(user_id=user_id, created_at=now - timedelta(minutes=2), suffix="c", md5_hash="h3") + u_recent_failed.upload_process_state = UploadProcessState.FAILED + + await repository.insert_upload(u_old_completed) + await repository.insert_upload(u_recent_completed) + await repository.insert_upload(u_recent_failed) + + # WHEN getting user uploads (completed only) + items = await repository.get_user_uploads(user_id=user_id) + + # THEN only completed uploads are returned, newest first + assert [it.filename for it in items] == [u_recent_completed.filename, u_old_completed.filename] + + diff --git a/backend/app/users/cv/test_routes.py b/backend/app/users/cv/test_routes.py index 044ba3c72..9738de1ba 100644 --- a/backend/app/users/cv/test_routes.py +++ b/backend/app/users/cv/test_routes.py @@ -19,8 +19,9 @@ from app.users.cv.service import ICVUploadService from app.users.cv.errors import MarkdownTooLongError, EmptyMarkdownError, \ CVLimitExceededError, CVUploadRateLimitExceededError, DuplicateCVUploadError, MarkdownConversionTimeoutError -from app.users.cv.types import UserCVUpload, UploadProcessState +from app.users.cv.types import UploadProcessState, CVUploadListItemResponse, CVUploadStatus from common_libs.test_utilities.mock_auth import MockAuth +from app.users.get_user_preferences_repository import get_user_preferences_repository TestClientWithMocks = tuple[TestClient, ICVUploadService, UserInfo] @@ -28,36 +29,42 @@ @pytest.fixture(scope='function') def client_with_mocks() -> TestClientWithMocks: class MockCVService(ICVUploadService): - async def parse_cv(self, *, user_id: str, file_bytes: bytes, filename: str): + async def parse_cv(self, *, user_id: str, file_bytes: bytes, filename: str, session_id: int | None = None): # Service returns upload_id string per contract return "test-upload-id" async def cancel_upload(self, *, user_id: str, upload_id: str) -> bool: return True - async def get_upload_status(self, *, user_id: str, upload_id: str) -> Optional[dict]: - return { - "upload_id": upload_id, - "user_id": user_id, - "filename": "test.pdf", - "upload_process_state": "COMPLETED", - "cancel_requested": False, - "created_at": "2025-01-01T00:00:00Z", - "last_activity_at": "2025-01-01T00:00:00Z", - } - - async def get_user_cvs(self, *, user_id: str) -> list[dict]: + async def get_upload_status(self, *, user_id: str, upload_id: str) -> Optional[CVUploadStatus]: + return CVUploadStatus( + upload_id=upload_id, + user_id=user_id, + filename="test.pdf", + upload_process_state=UploadProcessState.COMPLETED, + cancel_requested=False, + created_at=datetime.fromisoformat("2025-01-01T00:00:00+00:00"), + last_activity_at=datetime.fromisoformat("2025-01-01T00:00:00+00:00"), + experience_bullets=None, + ) + + async def get_user_cvs(self, *, user_id: str) -> list[CVUploadListItemResponse]: return [ - { - "upload_id": "upload-1", - "filename": "cv1.pdf", - "uploaded_at": "2025-01-01T00:00:00Z", - "upload_process_state": "COMPLETED", - "experiences_data": ["Experience 1", "Experience 2"], - }, - + CVUploadListItemResponse( + upload_id="upload-1", + filename="cv1.pdf", + uploaded_at="2025-01-01T00:00:00Z", + upload_process_state=UploadProcessState.COMPLETED, + ), ] + async def reinject_upload(self, *, user_id: str, upload_id: str, session_id: int | None = None) -> dict: + return {"state_injected": True, "experience_bullets": None} + + class MockUserPreferencesRepo: + def get_user_preference_by_user_id(self, _user_id: str): + return SimpleNamespace(sessions=[987]) + _instance_cv_service = MockCVService() def _mocked_get_cv_service() -> ICVUploadService: @@ -68,6 +75,7 @@ def _mocked_get_cv_service() -> ICVUploadService: api_router = APIRouter() app = FastAPI() app.dependency_overrides[get_cv_service] = _mocked_get_cv_service + app.dependency_overrides[get_user_preferences_repository] = lambda: MockUserPreferencesRepo() add_user_cv_routes(api_router, auth=_instance_auth) app.include_router(api_router) @@ -328,6 +336,37 @@ async def test_cancel_upload_service_exception_maps_to_500(self, client_with_moc assert resp.status_code == HTTPStatus.INTERNAL_SERVER_ERROR +class TestReinjectCVUpload: + @pytest.mark.asyncio + async def test_reinject_success(self, client_with_mocks: TestClientWithMocks, mocker: pytest_mock.MockerFixture): + client, mocked_service, mocked_user = client_with_mocks + mocker.patch.object(mocked_service, "reinject_upload", return_value={"state_injected": True, "experience_bullets": None}) + + resp = client.post(f"/{mocked_user.user_id}/cv/test-upload-id/inject") + + assert resp.status_code == HTTPStatus.OK + assert resp.json()["state_injected"] is True + + @pytest.mark.asyncio + async def test_reinject_returns_false(self, client_with_mocks: TestClientWithMocks, + mocker: pytest_mock.MockerFixture): + client, mocked_service, mocked_user = client_with_mocks + mocker.patch.object(mocked_service, "reinject_upload", return_value={"state_injected": False, "experience_bullets": None}) + + resp = client.post(f"/{mocked_user.user_id}/cv/test-upload-id/inject") + + assert resp.status_code == HTTPStatus.OK + assert resp.json()["state_injected"] is False + + @pytest.mark.asyncio + async def test_reinject_forbidden_other_user(self, client_with_mocks: TestClientWithMocks): + client, _, mocked_user = client_with_mocks + + resp = client.post(f"/{mocked_user.user_id}_other/cv/test-upload-id/inject") + + assert resp.status_code == HTTPStatus.FORBIDDEN + + class TestGetUploadStatus: @pytest.mark.asyncio async def test_get_upload_status_success(self, client_with_mocks: TestClientWithMocks): @@ -355,7 +394,8 @@ async def test_get_upload_status_forbidden_other_user(self, client_with_mocks: T assert resp.status_code == HTTPStatus.FORBIDDEN @pytest.mark.asyncio - async def test_get_upload_status_not_found(self, client_with_mocks: TestClientWithMocks, mocker: pytest_mock.MockerFixture): + async def test_get_upload_status_not_found(self, client_with_mocks: TestClientWithMocks, + mocker: pytest_mock.MockerFixture): client, mocked_service, mocked_user = client_with_mocks # GIVEN service returns None (upload not found) mocker.patch.object(mocked_service, "get_upload_status", return_value=None) @@ -368,7 +408,8 @@ async def test_get_upload_status_not_found(self, client_with_mocks: TestClientWi assert "Upload not found" in resp.json()["detail"] @pytest.mark.asyncio - async def test_get_upload_status_service_exception_maps_to_500(self, client_with_mocks: TestClientWithMocks, mocker: pytest_mock.MockerFixture): + async def test_get_upload_status_service_exception_maps_to_500(self, client_with_mocks: TestClientWithMocks, + mocker: pytest_mock.MockerFixture): client, mocked_service, mocked_user = client_with_mocks # GIVEN service raises unexpected exception mocker.patch.object(mocked_service, "get_upload_status", side_effect=Exception("boom")) @@ -379,6 +420,7 @@ async def test_get_upload_status_service_exception_maps_to_500(self, client_with # THEN 500 Internal Server Error assert resp.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + class TestGetUploadedCVs: @pytest.mark.asyncio async def test_get_uploaded_cvs_success(self, client_with_mocks: TestClientWithMocks, @@ -387,19 +429,17 @@ async def test_get_uploaded_cvs_success(self, client_with_mocks: TestClientWithM # GIVEN service returns a list of uploads uploads = [ - SimpleNamespace( + CVUploadListItemResponse( upload_id="upload-1", filename="cv1.pdf", - created_at=datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc), - upload_process_state="COMPLETED", - experience_bullets=["Experience 1", "Experience 2"], + uploaded_at="2025-01-01T00:00:00Z", + upload_process_state=UploadProcessState.COMPLETED, ), - SimpleNamespace( + CVUploadListItemResponse( upload_id="upload-2", filename="cv2.docx", - created_at=datetime(2025, 1, 2, 0, 0, 0, tzinfo=timezone.utc), - upload_process_state="COMPLETED", - experience_bullets=["Experience 1"], + uploaded_at="2025-01-02T00:00:00Z", + upload_process_state=UploadProcessState.COMPLETED, ), ] mock_get_user_cvs = mocker.patch.object( @@ -422,13 +462,12 @@ async def test_get_uploaded_cvs_success(self, client_with_mocks: TestClientWithM for item, expected in zip(body, uploads): assert item["upload_id"] == expected.upload_id assert item["filename"] == expected.filename - returned_dt = datetime.fromisoformat(item["uploaded_at"].replace("Z", "+00:00")) - assert returned_dt == expected.created_at - assert item["upload_process_state"] == expected.upload_process_state - assert item["experiences_data"] == expected.experience_bullets + assert item["uploaded_at"] == expected.uploaded_at + assert item["upload_process_state"] == expected.upload_process_state.value @pytest.mark.asyncio - async def test_get_uploaded_cvs_forbidden_other_user(self, client_with_mocks: TestClientWithMocks, mocker: pytest_mock.MockerFixture): + async def test_get_uploaded_cvs_forbidden_other_user(self, client_with_mocks: TestClientWithMocks, + mocker: pytest_mock.MockerFixture): client, mocked_service, mocked_user = client_with_mocks # GIVEN a mocked service get_user_cvs_mock = mocker.patch.object(mocked_service, "get_user_cvs", mocker.AsyncMock()) @@ -443,7 +482,7 @@ async def test_get_uploaded_cvs_forbidden_other_user(self, client_with_mocks: Te @pytest.mark.asyncio async def test_get_uploaded_cvs_service_exception_maps_to_500(self, client_with_mocks: TestClientWithMocks, - mocker: pytest_mock.MockerFixture): + mocker: pytest_mock.MockerFixture): client, mocked_service, mocked_user = client_with_mocks # GIVEN service raises unexpected exception mocker.patch.object(mocked_service, "get_user_cvs", side_effect=Exception("boom")) @@ -453,3 +492,40 @@ async def test_get_uploaded_cvs_service_exception_maps_to_500(self, client_with_ # THEN 500 Internal Server Error assert resp.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + + +class TestCVUploadSessionExtraction: + """Tests for CV upload route session_id extraction from user preferences""" + + @pytest.mark.asyncio + async def test_upload_calls_service_with_session_when_user_has_sessions(self, + client_with_mocks: TestClientWithMocks, + mocker: pytest_mock.MockerFixture): + """Test that upload succeeds when user has sessions (integration via service mock)""" + # GIVEN a test client with mocked service + client, mocked_service, mocked_user = client_with_mocks + + # AND a tracker to capture session_id parameter passed to service + captured_session_ids = [] + original_parse_cv = mocked_service.parse_cv + + async def parse_cv_tracker(**kwargs): + captured_session_ids.append(kwargs.get("session_id")) + return await original_parse_cv(**kwargs) + + mocker.patch.object(mocked_service, "parse_cv", side_effect=parse_cv_tracker) + + # AND valid CV file data + given_mime_type = next(iter(ALLOWED_MIME_TYPES)) + given_extension = next(iter(ALLOWED_EXTENSIONS)) + given_file_content = b"hello" + given_headers = {"Content-Type": given_mime_type, "x-filename": f"cv{given_extension}"} + + # WHEN uploading CV (session_id will come from route's user preferences lookup) + response = client.post(f"/{mocked_user.user_id}/cv", data=given_file_content, headers=given_headers) + + # THEN request succeeds + assert response.status_code == HTTPStatus.OK + + # AND service was called (session_id may be None if user prefs not mocked in this simple test) + assert len(captured_session_ids) > 0 diff --git a/backend/app/users/cv/test_service.py b/backend/app/users/cv/test_service.py index 7b02c71c3..db5d741cb 100644 --- a/backend/app/users/cv/test_service.py +++ b/backend/app/users/cv/test_service.py @@ -1,10 +1,8 @@ -from datetime import datetime, timezone - import pytest import asyncio from app.users.cv.service import CVUploadService -from app.users.cv.types import CVUploadErrorCode, CVUploadResponseListItem, UploadProcessState +from app.users.cv.types import CVUploadErrorCode, UserCVUpload, CVStructuredExtraction from app.users.cv.errors import CVLimitExceededError, CVUploadRateLimitExceededError, DuplicateCVUploadError from app.users.cv.repository import IUserCVRepository from app.users.cv.storage import ICVCloudStorageService @@ -45,11 +43,14 @@ async def mark_cancelled(self, user_id: str, upload_id: str) -> bool: async def mark_failed(self, user_id: str, upload_id: str, *, error_code: str, error_detail: str) -> bool: return True - async def store_experiences(self, user_id: str, upload_id: str, *, experiences: list[str]) -> bool: + async def store_structured_extraction(self, user_id: str, upload_id: str, *, structured_extraction: CVStructuredExtraction) -> bool: + return True + + async def mark_state_injected(self, user_id: str, upload_id: str) -> bool: return True - async def get_user_uploads(self, *, user_id: str) -> list[CVUploadResponseListItem]: - return [] + async def mark_injection_failed(self, user_id: str, upload_id: str, *, error: str) -> bool: + return True class MockCVCloudStorageService(ICVCloudStorageService): @@ -59,6 +60,14 @@ def upload_cv(self, *, document: UserCVUpload, markdown_text: str, original_bytes: bytes) -> None: # Noncompliant - we keep this method empty cause its a mock for a test pass + def download_markdown(self, *, object_path: str) -> str: # pragma: no cover - test helper + return "# mock markdown" + + +class DummyStructuredExtractor: + def extract_structured_experiences(self, markdown_cv: str) -> CVStructuredExtraction: + return CVStructuredExtraction(collected_data=[], experience_entities=[], extraction_metadata={}) + class TestCVUploadService: @pytest.mark.asyncio @@ -68,7 +77,8 @@ async def test_parse_cv_returns_upload_id_and_empty_experiences_immediately(self given_filename = "resume.pdf" # WHEN parsing the CV in the service (immediate response design) - service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_max_uploads_per_user": 999, "cv_rate_limit_per_minute": 999, @@ -89,7 +99,8 @@ class RepoSpy(MockCVRepository): pass repo = RepoSpy() - service = CVUploadService(repository=repo, cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=repo, cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_max_uploads_per_user": 999, "cv_rate_limit_per_minute": 999, @@ -104,7 +115,6 @@ class RepoSpy(MockCVRepository): assert isinstance(res, str) # Let the background task run await asyncio.sleep(0.1) - # THEN assert mark_failed_spy.called _, kwargs = mark_failed_spy.call_args @@ -113,7 +123,8 @@ class RepoSpy(MockCVRepository): @pytest.mark.asyncio async def test_parse_cv_does_not_validate_markdown_immediately(self, mocker): # GIVEN service under test - service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_max_uploads_per_user": 999, "cv_rate_limit_per_minute": 999, @@ -125,7 +136,8 @@ async def test_parse_cv_does_not_validate_markdown_immediately(self, mocker): @pytest.mark.asyncio async def test_parse_cv_returns_empty_experiences_immediately(self, mocker): - service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_max_uploads_per_user": 999, "cv_rate_limit_per_minute": 999, @@ -135,7 +147,8 @@ async def test_parse_cv_returns_empty_experiences_immediately(self, mocker): @pytest.mark.asyncio async def test_parse_cv_does_not_raise_on_empty_markdown_immediately(self, mocker): - service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=MockCVRepository(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_max_uploads_per_user": 999, "cv_rate_limit_per_minute": 999, @@ -148,8 +161,10 @@ async def test_blocks_when_total_limit_reached(self, mocker): # GIVEN converter returns valid markdown mocker.patch("app.users.cv.service.convert_cv_bytes_to_markdown", mocker.Mock(return_value="# md")) extractor_instance = mocker.Mock() - extractor_instance.extract_experiences = mocker.AsyncMock(return_value=["x"]) - mocker.patch("app.users.cv.service.CVExperienceExtractor", mocker.Mock(return_value=extractor_instance)) + extractor_instance.extract_structured_experiences = mocker.AsyncMock( + return_value=mocker.Mock(extraction_metadata={"total_experiences": 1})) + mocker.patch("app.users.cv.service.CVStructuredExperienceExtractor", + mocker.Mock(return_value=extractor_instance)) # AND a custom repository that returns count of 3 (exceeds limit) class CustomRepoMock(MockCVRepository): @@ -157,7 +172,8 @@ async def count_uploads_for_user(self, user_id: str) -> int: return 3 # AND application config with max uploads limit of 3 - service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_storage_bucket": "bucket", "cv_max_uploads_per_user": 3, @@ -174,8 +190,10 @@ async def test_blocks_when_rate_limit_reached(self, mocker): # GIVEN converter returns valid markdown mocker.patch("app.users.cv.service.convert_cv_bytes_to_markdown", mocker.Mock(return_value="# md")) extractor_instance = mocker.Mock() - extractor_instance.extract_experiences = mocker.AsyncMock(return_value=["x"]) - mocker.patch("app.users.cv.service.CVExperienceExtractor", mocker.Mock(return_value=extractor_instance)) + extractor_instance.extract_structured_experiences = mocker.AsyncMock( + return_value=mocker.Mock(extraction_metadata={"total_experiences": 1})) + mocker.patch("app.users.cv.service.CVStructuredExperienceExtractor", + mocker.Mock(return_value=extractor_instance)) # AND a custom repository that returns rate limit count of 5 (exceeds limit) class CustomRepoMock(MockCVRepository): @@ -183,7 +201,8 @@ async def count_uploads_for_user_in_window(self, user_id: str, *, minutes: int) return 5 # AND application config with rate limit of 5 per minute - service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_storage_bucket": "bucket", "cv_max_uploads_per_user": 100, @@ -200,8 +219,10 @@ async def test_allows_when_under_limits(self, mocker): # GIVEN converter returns valid markdown mocker.patch("app.users.cv.service.convert_cv_bytes_to_markdown", mocker.Mock(return_value="# md")) extractor_instance = mocker.Mock() - extractor_instance.extract_experiences = mocker.AsyncMock(return_value=["x"]) - mocker.patch("app.users.cv.service.CVExperienceExtractor", mocker.Mock(return_value=extractor_instance)) + extractor_instance.extract_structured_experiences = mocker.AsyncMock( + return_value=mocker.Mock(extraction_metadata={"total_experiences": 1})) + mocker.patch("app.users.cv.service.CVStructuredExperienceExtractor", + mocker.Mock(return_value=extractor_instance)) # AND a custom repository that returns count of 1 (under limit) class CustomRepoMock(MockCVRepository): @@ -209,7 +230,8 @@ async def count_uploads_for_user(self, user_id: str) -> int: return 1 # AND application config with limits that allow the upload - service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_storage_bucket": "bucket", "cv_max_uploads_per_user": 3, @@ -227,8 +249,10 @@ async def test_raises_duplicate_cv_upload_error(self, mocker): # GIVEN converter returns valid markdown mocker.patch("app.users.cv.service.convert_cv_bytes_to_markdown", mocker.Mock(return_value="# md")) extractor_instance = mocker.Mock() - extractor_instance.extract_experiences = mocker.AsyncMock(return_value=["x"]) - mocker.patch("app.users.cv.service.CVExperienceExtractor", mocker.Mock(return_value=extractor_instance)) + extractor_instance.extract_structured_experiences = mocker.AsyncMock( + return_value=mocker.Mock(extraction_metadata={"total_experiences": 1})) + mocker.patch("app.users.cv.service.CVStructuredExperienceExtractor", + mocker.Mock(return_value=extractor_instance)) # AND a custom repository that raises DuplicateCVUploadError class CustomRepoMock(MockCVRepository): @@ -237,7 +261,8 @@ async def insert_upload(self, upload: UserCVUpload) -> str: raise DuplicateCVUploadError("duplicate_hash_123") # AND application config with limits that allow the upload - service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=CustomRepoMock(), cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { "cv_storage_bucket": "bucket", "cv_max_uploads_per_user": 3, @@ -255,7 +280,8 @@ async def insert_upload(self, upload: UserCVUpload) -> str: async def test_cancel_upload_success(self, mocker): # GIVEN a service with a mock repository mock_repository = MockCVRepository() - service = CVUploadService(repository=mock_repository, cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=mock_repository, cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) # AND the repository returns True for successful cancellation mock_cancel = mocker.patch.object(mock_repository, "request_cancellation", return_value=True) @@ -271,7 +297,8 @@ async def test_cancel_upload_success(self, mocker): async def test_cancel_upload_not_found(self, mocker): # GIVEN a service with a mock repository mock_repository = MockCVRepository() - service = CVUploadService(repository=mock_repository, cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=mock_repository, cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) # AND the repository returns False (upload not found) mock_cancel = mocker.patch.object(mock_repository, "request_cancellation", return_value=False) @@ -287,7 +314,8 @@ async def test_cancel_upload_not_found(self, mocker): async def test_cancel_upload_repository_exception(self, mocker): # GIVEN a service with a mock repository mock_repository = MockCVRepository() - service = CVUploadService(repository=mock_repository, cv_cloud_storage_service=MockCVCloudStorageService()) + service = CVUploadService(repository=mock_repository, cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor()) # AND the repository raises an exception mock_cancel = mocker.patch.object(mock_repository, "request_cancellation", @@ -300,53 +328,156 @@ async def test_cancel_upload_repository_exception(self, mocker): assert result is False mock_cancel.assert_called_once_with("user123", "upload456") + @pytest.mark.asyncio + async def test_pipeline_injects_state_when_session_id_provided(self, mocker): + # GIVEN a repo we can spy on + class RepoSpy(MockCVRepository): + pass + + repo = RepoSpy() + mark_injected_spy = mocker.spy(repo, "mark_state_injected") + + # AND a minimal application state manager + class _InMemoryStateManager: + def get_state(self, session_id: int): + from app.application_state import ApplicationState + return ApplicationState.new_state(session_id=session_id) + + def save_state(self, state): + return None + + def delete_state(self, session_id: int): + return None + + def get_all_session_ids(self): + return None + + # AND a service with the manager + service = CVUploadService(repository=repo, cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor(), + application_state_manager=_InMemoryStateManager()) + mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { + "cv_max_uploads_per_user": 999, + "cv_rate_limit_per_minute": 999, + })())) + # AND stub structured extractor to return minimal structured data + mocker.patch.object(service._structured_extractor, "extract_structured_experiences", + mocker.AsyncMock( + return_value=CVStructuredExtraction(collected_data=[], experience_entities=[], + extraction_metadata={}))) + # AND stub storage upload (called via to_thread) by patching method to no-op + mocker.patch.object(service._cv_cloud_storage_service, "upload_cv", return_value=None) + # AND spy on injection service to verify it is invoked + inj_spy = mocker.patch("app.users.cv.service.StateInjectionService.inject_cv_data", + new=mocker.AsyncMock(return_value=True)) + + # WHEN the parsing is run with a session_id + upload_id = await service.parse_cv(user_id="u", file_bytes=b"x", filename="cv.pdf", session_id=123) + assert isinstance(upload_id, str) + await asyncio.sleep(0.1) + + # THEN injection is attempted and success is recorded + assert mark_injected_spy.called + inj_spy.assert_called() @pytest.mark.asyncio - async def test_list_user_uploads_returns_multiple_uploads(self, mocker): - # GIVEN a repository instance - repo = MockCVRepository() - # AND mocked get_user_uploads to return uploads - uploaded1 = UserCVUpload( - user_id="user123", - upload_id="upload1", - filename="cv1.pdf", - created_at=datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc), - content_type="application/pdf", - object_path="path/to/cv1.pdf", - markdown_object_path="path/to/cv1.md", - markdown_char_len=10, - md5_hash="hash1", - upload_process_state=UploadProcessState.COMPLETED, - experience_bullets=["Experience 1", "Experience 2"] - ) - uploaded2 = UserCVUpload( - user_id="user123", - upload_id="upload2", - filename="cv2.pdf", - created_at=datetime(2024, 5, 1, 12, 0, 0, tzinfo=timezone.utc), - content_type="application/pdf", - object_path="path/to/cv2.pdf", - markdown_object_path="path/to/cv2.md", - markdown_char_len=20, - md5_hash="hash2", - upload_process_state=UploadProcessState.COMPLETED, - experience_bullets=["Experience 1"] - ) - mock_get_user_uploads = mocker.patch.object( - repo, "get_user_uploads", mocker.AsyncMock(return_value=[uploaded1, uploaded2]) - ) - service = CVUploadService(repository=repo, cv_cloud_storage_service=MockCVCloudStorageService()) + async def test_pipeline_marks_injection_failed_when_injection_returns_false(self, mocker): + # GIVEN a repo we can spy on + class RepoSpy(MockCVRepository): + pass + + repo = RepoSpy() + mark_injected_spy = mocker.spy(repo, "mark_state_injected") + mark_injection_failed_spy = mocker.spy(repo, "mark_injection_failed") + + # AND a minimal application state manager + class _InMemoryStateManager: + def get_state(self, session_id: int): + from app.application_state import ApplicationState + return ApplicationState.new_state(session_id=session_id) + + def save_state(self, state): + return None + + def delete_state(self, session_id: int): + return None + + def get_all_session_ids(self): + return None + + service = CVUploadService(repository=repo, cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor(), + application_state_manager=_InMemoryStateManager()) + mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { + "cv_max_uploads_per_user": 999, + "cv_rate_limit_per_minute": 999, + })())) + # Stub extractor and storage + mocker.patch.object(service._structured_extractor, "extract_structured_experiences", + mocker.AsyncMock( + return_value=CVStructuredExtraction(collected_data=[], experience_entities=[], + extraction_metadata={}))) + mocker.patch.object(service._cv_cloud_storage_service, "upload_cv", return_value=None) + # Force injection to return False + inj_spy = mocker.patch("app.users.cv.service.StateInjectionService.inject_cv_data", + new=mocker.AsyncMock(return_value=False)) + + # WHEN + upload_id = await service.parse_cv(user_id="u", file_bytes=b"x", filename="cv.pdf", session_id=456) + assert isinstance(upload_id, str) + await asyncio.sleep(0.1) + + # THEN injection attempted but mark_injection_failed called, not mark_state_injected + inj_spy.assert_called() + assert mark_injected_spy.called is False + assert mark_injection_failed_spy.called is True + + @pytest.mark.asyncio + async def test_pipeline_handles_cancel_request_without_marking_injected(self, mocker): + # GIVEN a repo where cancellation can be requested + class RepoSpy(MockCVRepository): + def __init__(self): + self._cancel_requested = False + + async def request_cancellation(self, user_id: str, upload_id: str) -> bool: + self._cancel_requested = True + return True + + repo = RepoSpy() + mocker.spy(repo, "mark_state_injected") + + class _InMemoryStateManager: + def get_state(self, session_id: int): + from app.application_state import ApplicationState + return ApplicationState.new_state(session_id=session_id) - # WHEN listing uploads for a user - result = await service.get_user_cvs(user_id="user123") + def save_state(self, state): + return None - # THEN repository is called with correct user_id - mock_get_user_uploads.assert_called_once_with(user_id="user123") + def delete_state(self, session_id: int): + return None - # AND it returns the expected uploads - assert isinstance(result, list) - assert len(result) == 2 - assert result[0].upload_id == "upload1" - assert result[1].upload_id == "upload2" - assert result[0].experience_bullets == ["Experience 1", "Experience 2"] - assert result[1].experience_bullets == ["Experience 1"] + def get_all_session_ids(self): + return None + + service = CVUploadService(repository=repo, cv_cloud_storage_service=MockCVCloudStorageService(), + structured_extractor=DummyStructuredExtractor(), + application_state_manager=_InMemoryStateManager()) + mocker.patch("app.users.cv.service.get_application_config", mocker.Mock(return_value=type("C", (), { + "cv_max_uploads_per_user": 999, + "cv_rate_limit_per_minute": 999, + })())) + + # Slow extractor so we can cancel during pipeline + async def slow_extract(*args, **kwargs): + await asyncio.sleep(0.05) + return CVStructuredExtraction(collected_data=[], experience_entities=[], extraction_metadata={}) + + mocker.patch.object(service._structured_extractor, "extract_structured_experiences", + mocker.AsyncMock(side_effect=slow_extract)) + mocker.patch.object(service._cv_cloud_storage_service, "upload_cv", return_value=None) + + # WHEN start parse to get upload id, then immediately cancel + upload_id = await service.parse_cv(user_id="u", file_bytes=b"x", filename="cv.pdf", session_id=789) + await service.cancel_upload(user_id="u", upload_id=upload_id) + await asyncio.sleep(0.1) diff --git a/backend/app/users/cv/test_state_injection_integration.py b/backend/app/users/cv/test_state_injection_integration.py new file mode 100644 index 000000000..8c05a7053 --- /dev/null +++ b/backend/app/users/cv/test_state_injection_integration.py @@ -0,0 +1,284 @@ +""" +Integration tests for CV upload with state injection. + +These tests verify that the full pipeline works: +1. CV upload extracts session_id from user preferences +2. Structured extraction produces data +3. State injection populates all agent states correctly +4. State persists and can be retrieved +""" +import asyncio + +import pytest +from unittest.mock import AsyncMock + +from app.application_state import ApplicationState +from app.agent.collect_experiences_agent._types import CollectedData +from app.agent.experience.experience_entity import ExperienceEntity, ResponsibilitiesData +from app.agent.experience.timeline import Timeline +from app.agent.experience.work_type import WorkType +from app.users.cv.service import CVUploadService +from app.users.cv.types import CVStructuredExtraction, UploadProcessState, UserCVUpload +from app.users.cv.test_service import MockCVRepository, MockCVCloudStorageService + + +class InMemoryCVRepository(MockCVRepository): + """In-memory repository that tracks injection status""" + + def __init__(self): + super().__init__() + self._uploads = {} + self._injected_upload_ids = set() + self._failed_injection_errors = {} + + async def insert_upload(self, upload: UserCVUpload) -> str: + self._uploads[upload.upload_id] = upload + return upload.upload_id + + async def get_upload_by_id(self, user_id: str, upload_id: str): + upload = self._uploads.get(upload_id) + if upload and upload.user_id == user_id: + return upload.model_dump() + return None + + async def get_upload_by_upload_id(self, upload_id: str): + upload = self._uploads.get(upload_id) + return upload.model_dump() if upload else None + + async def update_state(self, user_id: str, upload_id: str, *, to_state: UploadProcessState): + upload = self._uploads.get(upload_id) + if upload: + upload.upload_process_state = to_state + + async def mark_state_injected(self, user_id: str, upload_id: str): + self._injected_upload_ids.add(upload_id) + upload = self._uploads.get(upload_id) + if upload: + upload.state_injected = True + + async def mark_injection_failed(self, user_id: str, upload_id: str, error: str): + self._failed_injection_errors[upload_id] = error + upload = self._uploads.get(upload_id) + if upload: + upload.state_injected = False + upload.injection_error = error + + @property + def injected_uploads(self): + return self._injected_upload_ids + + @property + def failed_injections(self): + return self._failed_injection_errors + + +class InMemoryStateManager: + """In-memory state manager for testing""" + + def __init__(self): + self._states = {} + + async def get_state(self, session_id: int) -> ApplicationState: + if session_id not in self._states: + self._states[session_id] = ApplicationState.new_state(session_id=session_id) + return self._states[session_id] + + async def save_state(self, state: ApplicationState): + self._states[state.session_id] = state + + async def delete_state(self, session_id: int): + self._states.pop(session_id, None) + + +class TestCVStateInjectionIntegration: + """Integration tests for CV upload with state injection""" + + @pytest.mark.asyncio + async def test_full_pipeline_injects_state_correctly(self, mocker): + """Test that the full pipeline extracts, injects, and persists state""" + # GIVEN a session and user + given_session_id = 12345 + given_user_id = "test-user" + given_file_bytes = b"fake pdf content" + given_filename = "test.pdf" + + # AND an in-memory repository and storage service + given_repository = InMemoryCVRepository() + given_storage_service = MockCVCloudStorageService() + + # AND an in-memory application state manager + given_state_manager = InMemoryStateManager() + + # AND a CV upload service + class DummyStructuredExtractor: + def extract_structured_experiences(self, markdown_cv: str) -> CVStructuredExtraction: + return CVStructuredExtraction(collected_data=[], experience_entities=[], extraction_metadata={}) + cv_upload_service = CVUploadService( + repository=given_repository, + cv_cloud_storage_service=given_storage_service, + structured_extractor=DummyStructuredExtractor(), + application_state_manager=given_state_manager + ) + + # AND application config with high limits + mocker.patch("app.users.cv.service.get_application_config", return_value=type("C", (), { + "cv_max_uploads_per_user": 999, + "cv_rate_limit_per_minute": 999, + })()) + + # AND structured extraction data + given_experience_title = "Test Job" + given_company = "Test Corp" + given_location = "Test City" + given_start_date = "2020-01-01" + given_end_date = "2022-12-31" + given_experience_uuid = "test-exp-1" + + given_structured_extraction = CVStructuredExtraction( + collected_data=[ + CollectedData( + index=0, + experience_title=given_experience_title, + company=given_company, + location=given_location, + start_date=given_start_date, + end_date=given_end_date, + paid_work=True, + work_type="waged-employee" + ) + ], + experience_entities=[ + ExperienceEntity( + uuid=given_experience_uuid, + experience_title=given_experience_title, + company=given_company, + location=given_location, + timeline=Timeline(start=given_start_date, end=given_end_date), + work_type=WorkType.FORMAL_SECTOR_WAGED_EMPLOYMENT, + responsibilities=ResponsibilitiesData(responsibilities=[ + "Coordinated cross-functional planning", + "Implemented monitoring dashboards", + "Facilitated weekly stakeholder reviews", + ]) + ) + ], + extraction_metadata={"total_experiences": 1} + ) + + # AND mocked structured extractor to return test data + mocker.patch.object(cv_upload_service._structured_extractor, "extract_structured_experiences", + new=AsyncMock(return_value=given_structured_extraction)) + + # AND mocked markdown conversion + given_markdown = "# Test CV\n\nTest Job at Test Corp" + mocker.patch("app.users.cv.service.convert_cv_bytes_to_markdown", + new=AsyncMock(return_value=given_markdown)) + + # WHEN uploading a CV with a session_id + returned_upload_id = await cv_upload_service.parse_cv( + user_id=given_user_id, + file_bytes=given_file_bytes, + filename=given_filename, + session_id=given_session_id + ) + + # Wait for background pipeline to complete + await asyncio.sleep(0.5) + + # THEN upload should be marked as completed + actual_upload = await given_repository.get_upload_by_id(given_user_id, returned_upload_id) + assert actual_upload is not None + assert actual_upload["upload_process_state"] == UploadProcessState.COMPLETED.value + + # AND state should be injected + assert returned_upload_id in given_repository.injected_uploads + + # AND application state should have the injected data + actual_application_state = await given_state_manager.get_state(given_session_id) + + # AND CollectExperiencesAgent state should have collected data + assert len(actual_application_state.collect_experience_state.collected_data) > 0 + assert any(cd.experience_title == given_experience_title for cd in actual_application_state.collect_experience_state.collected_data) + assert actual_application_state.collect_experience_state.first_time_visit is False + + # AND ExploreExperiencesAgent state should have experience entities + assert len(actual_application_state.explore_experiences_director_state.experiences_state) > 0 + assert any(given_experience_uuid in key for key in actual_application_state.explore_experiences_director_state.experiences_state.keys()) + + # AND SkillsExplorerAgent state should treat CV-injected experiences as fresh + # (They will be added to experiences_explored after going through the normal flow) + # The experience should NOT be in experiences_explored yet since it needs to go through exploration + # The first_time_for_experience entry should be removed (or not present) so it's treated as fresh + assert given_experience_uuid not in actual_application_state.skills_explorer_agent_state.first_time_for_experience + # The experience should be in the director's state ready to be processed + assert given_experience_uuid in actual_application_state.explore_experiences_director_state.experiences_state + + @pytest.mark.asyncio + async def test_pipeline_handles_injection_failure_gracefully(self, mocker): + """Test that pipeline continues even if injection fails""" + # GIVEN a session and user + given_session_id = 12345 + given_user_id = "test-user" + given_file_bytes = b"fake pdf content" + given_filename = "test.pdf" + + # AND an in-memory repository and storage service + given_repository = InMemoryCVRepository() + given_storage_service = MockCVCloudStorageService() + + # AND a state manager that fails on get_state + failing_state_manager = AsyncMock() + failing_state_manager.get_state = AsyncMock(side_effect=Exception("State fetch failed")) + failing_state_manager.save_state = AsyncMock() + + # AND a CV upload service + class DummyStructuredExtractor: + def extract_structured_experiences(self, markdown_cv: str) -> CVStructuredExtraction: + return CVStructuredExtraction(collected_data=[], experience_entities=[], extraction_metadata={}) + cv_upload_service = CVUploadService( + repository=given_repository, + cv_cloud_storage_service=given_storage_service, + structured_extractor=DummyStructuredExtractor(), + application_state_manager=failing_state_manager + ) + + # AND application config with high limits + mocker.patch("app.users.cv.service.get_application_config", return_value=type("C", (), { + "cv_max_uploads_per_user": 999, + "cv_rate_limit_per_minute": 999, + })()) + + # AND empty structured extraction data + given_empty_extraction = CVStructuredExtraction( + collected_data=[], + experience_entities=[], + extraction_metadata={} + ) + mocker.patch.object(cv_upload_service._structured_extractor, "extract_structured_experiences", + new=AsyncMock(return_value=given_empty_extraction)) + + # AND mocked markdown conversion + given_markdown = "# Test CV" + mocker.patch("app.users.cv.service.convert_cv_bytes_to_markdown", + new=AsyncMock(return_value=given_markdown)) + + # WHEN uploading with a session_id that will fail injection + returned_upload_id = await cv_upload_service.parse_cv( + user_id=given_user_id, + file_bytes=given_file_bytes, + filename=given_filename, + session_id=given_session_id + ) + + # Wait for pipeline to complete + await asyncio.sleep(0.5) + + # THEN upload should still complete + actual_upload = await given_repository.get_upload_by_id(given_user_id, returned_upload_id) + assert actual_upload is not None + assert actual_upload["upload_process_state"] == UploadProcessState.COMPLETED.value + + # AND injection failure should be recorded + assert returned_upload_id in given_repository.failed_injections + assert actual_upload.get("injection_error") is not None + diff --git a/backend/app/users/cv/test_state_injection_service.py b/backend/app/users/cv/test_state_injection_service.py new file mode 100644 index 000000000..a565e3985 --- /dev/null +++ b/backend/app/users/cv/test_state_injection_service.py @@ -0,0 +1,309 @@ +import pytest +from unittest.mock import AsyncMock + +from app.application_state import ApplicationState +from app.agent.collect_experiences_agent._types import CollectedData +from app.agent.explore_experiences_agent_director import ExperienceState, DiveInPhase +from app.agent.experience.experience_entity import ExperienceEntity, ResponsibilitiesData +from app.agent.experience.timeline import Timeline +from app.agent.experience.work_type import WorkType +from app.users.cv.services.state_injection_service import StateInjectionService +from app.users.cv.types import CVStructuredExtraction + + +def _create_test_collected_data(*, experience_title: str = "Software Engineer", + company: str = "Tech Corp", + location: str = "San Francisco", + start_date: str = "2020-01-01", + end_date: str = "2022-12-31", + index: int = 0) -> CollectedData: + """Helper to create test CollectedData""" + return CollectedData( + index=index, + experience_title=experience_title, + company=company, + location=location, + start_date=start_date, + end_date=end_date, + paid_work=True, + work_type="waged-employee" + ) + + +def _create_test_experience_entity(*, uuid: str = "exp1", + experience_title: str = "Software Engineer", + company: str = "Tech Corp", + location: str = "San Francisco", + start_date: str = "2020-01-01", + end_date: str = "2022-12-31", + responsibilities: list[str] | None = None) -> ExperienceEntity: + """Helper to create test ExperienceEntity""" + # Use default responsibilities only if None is explicitly passed (not if empty list) + default_responsibilities = responsibilities if responsibilities is not None else ["Developed web applications"] + return ExperienceEntity( + uuid=uuid, + experience_title=experience_title, + company=company, + location=location, + timeline=Timeline(start=start_date, end=end_date), + work_type=WorkType.FORMAL_SECTOR_WAGED_EMPLOYMENT, + responsibilities=ResponsibilitiesData( + responsibilities=default_responsibilities + ) + ) + + +class TestStateInjectionService: + """Tests for StateInjectionService""" + + @pytest.mark.asyncio + async def test_inject_cv_data_populates_collect_explore_and_skills_when_responsibilities_sufficient(self): + """Test that inject_cv_data properly populates all three agent states when data is complete.""" + # GIVEN a session and user + given_session_id = 123 + given_user_id = "user123" + given_application_state = ApplicationState.new_state(session_id=given_session_id) + + # AND a mock application state manager with the fresh state + mock_state_manager = AsyncMock() + mock_state_manager.get_state = AsyncMock(return_value=given_application_state) + mock_state_manager.save_state = AsyncMock() + + # AND a state injection service + injection_service = StateInjectionService(application_state_manager=mock_state_manager) + + # AND structured extraction data with collected data and experience entities + given_collected_data = [ + _create_test_collected_data(experience_title="Software Engineer", index=0) + ] + + given_experience_entity = _create_test_experience_entity( + uuid="exp1", + experience_title="Software Engineer", + responsibilities=[ + "Designed scalable web applications", + "Led code reviews across the team", + "Optimized deployment pipelines", + ], + ) + + given_structured_extraction = CVStructuredExtraction( + collected_data=given_collected_data, + experience_entities=[given_experience_entity], + extraction_metadata={} + ) + + # WHEN injecting CV data + injection_result = await injection_service.inject_cv_data( + user_id=given_user_id, + session_id=given_session_id, + structured_extraction=given_structured_extraction + ) + + # THEN injection should succeed + assert injection_result is True + + # AND state manager should be called correctly + mock_state_manager.get_state.assert_called_once_with(given_session_id) + mock_state_manager.save_state.assert_called_once_with(given_application_state) + + # AND CollectExperiencesAgent state should have collected data + assert len(given_application_state.collect_experience_state.collected_data) == 1 + assert given_application_state.collect_experience_state.collected_data[0].experience_title == "Software Engineer" + assert given_application_state.collect_experience_state.first_time_visit is False + + # AND ExploreExperiencesAgent state should have experience entities + assert len(given_application_state.explore_experiences_director_state.experiences_state) == 1 + assert "exp1" in given_application_state.explore_experiences_director_state.experiences_state + injected_experience_state = given_application_state.explore_experiences_director_state.experiences_state["exp1"] + assert injected_experience_state.experience.experience_title == "Software Engineer" + assert injected_experience_state.experience.uuid == "exp1" + # State injection service sets NOT_STARTED; agent director will decide flow based on responsibilities + assert injected_experience_state.dive_in_phase.name == "NOT_STARTED" + assert injected_experience_state.experience.questions_and_answers + question, answer = injected_experience_state.experience.questions_and_answers[-1] + assert "captured from your CV" in question + assert "• Designed scalable web applications" in answer + + # SkillsExplorerAgent treats CV-injected experiences as fresh (agent director will decide flow) + assert "exp1" not in given_application_state.skills_explorer_agent_state.first_time_for_experience + + @pytest.mark.asyncio + async def test_inject_cv_data_resets_existing_state_to_not_started(self): + """Existing experiences are reset to NOT_STARTED so agent director can decide flow based on responsibilities.""" + + given_session_id = 321 + given_user_id = "user321" + given_application_state = ApplicationState.new_state(session_id=given_session_id) + + # Seed an existing experience in EXPLORING_SKILLS phase with no responsibilities + seeded_experience = _create_test_experience_entity( + uuid="exp-existing", + experience_title="Project Manager", + responsibilities=[] + ) + given_application_state.explore_experiences_director_state.experiences_state["exp-existing"] = ExperienceState( + dive_in_phase=DiveInPhase.EXPLORING_SKILLS, + experience=seeded_experience + ) + + mock_state_manager = AsyncMock() + mock_state_manager.get_state = AsyncMock(return_value=given_application_state) + mock_state_manager.save_state = AsyncMock() + + injection_service = StateInjectionService(application_state_manager=mock_state_manager) + + enriched_experience = _create_test_experience_entity( + uuid="exp-existing", + experience_title="Project Manager", + responsibilities=[ + "Coordinated cross-team delivery timelines", + "Managed sprint planning ceremonies", + "Tracked project risks and mitigation plans", + ] + ) + + given_structured_extraction = CVStructuredExtraction( + collected_data=[_create_test_collected_data(index=0, experience_title="Project Manager")], + experience_entities=[enriched_experience], + extraction_metadata={} + ) + + injection_result = await injection_service.inject_cv_data( + user_id=given_user_id, + session_id=given_session_id, + structured_extraction=given_structured_extraction + ) + + assert injection_result is True + + updated_state = given_application_state.explore_experiences_director_state.experiences_state["exp-existing"] + # State injection service resets to NOT_STARTED; agent director will decide flow + assert updated_state.dive_in_phase == DiveInPhase.NOT_STARTED + assert updated_state.experience.responsibilities.responsibilities == [ + "Coordinated cross-team delivery timelines", + "Managed sprint planning ceremonies", + "Tracked project risks and mitigation plans", + ] + + question, answer = updated_state.experience.questions_and_answers[-1] + assert "captured from your CV" in question + assert "• Coordinated cross-team delivery timelines" in answer + + # SkillsExplorerAgent treats CV-injected experiences as fresh + assert "exp-existing" not in given_application_state.skills_explorer_agent_state.first_time_for_experience + + @pytest.mark.asyncio + async def test_inject_cv_data_leaves_sparse_responsibilities_for_conversation(self): + """Experiences with few responsibilities should remain in exploratory mode.""" + + given_session_id = 789 + given_user_id = "user789" + given_application_state = ApplicationState.new_state(session_id=given_session_id) + + mock_state_manager = AsyncMock() + mock_state_manager.get_state = AsyncMock(return_value=given_application_state) + mock_state_manager.save_state = AsyncMock() + + injection_service = StateInjectionService(application_state_manager=mock_state_manager) + + sparse_experience = _create_test_experience_entity( + uuid="exp-sparse", + experience_title="Analyst", + responsibilities=["Prepared weekly status reports"], + ) + + given_structured_extraction = CVStructuredExtraction( + collected_data=[_create_test_collected_data(index=0, experience_title="Analyst")], + experience_entities=[sparse_experience], + extraction_metadata={} + ) + + injection_result = await injection_service.inject_cv_data( + user_id=given_user_id, + session_id=given_session_id, + structured_extraction=given_structured_extraction + ) + + assert injection_result is True + + experience_state = given_application_state.explore_experiences_director_state.experiences_state["exp-sparse"] + assert experience_state.dive_in_phase.name == "NOT_STARTED" + assert experience_state.experience.questions_and_answers + assert given_application_state.skills_explorer_agent_state.first_time_for_experience.get("exp-sparse") is None + assert all( + "Analyst" not in summary + for summary in given_application_state.skills_explorer_agent_state.experiences_explored + ) + + @pytest.mark.asyncio + async def test_inject_cv_data_preserves_conversation_when_no_responsibilities(self): + """Experiences without responsibilities should still go through the normal dive-in flow.""" + given_session_id = 456 + given_user_id = "user456" + given_application_state = ApplicationState.new_state(session_id=given_session_id) + + mock_state_manager = AsyncMock() + mock_state_manager.get_state = AsyncMock(return_value=given_application_state) + mock_state_manager.save_state = AsyncMock() + + injection_service = StateInjectionService(application_state_manager=mock_state_manager) + + experience_without_responsibilities = ExperienceEntity( + uuid="exp-empty", + experience_title="Assistant", + responsibilities=ResponsibilitiesData(responsibilities=[]) + ) + + given_structured_extraction = CVStructuredExtraction( + collected_data=[_create_test_collected_data(index=0, experience_title="Assistant")], + experience_entities=[experience_without_responsibilities], + extraction_metadata={} + ) + + injection_result = await injection_service.inject_cv_data( + user_id=given_user_id, + session_id=given_session_id, + structured_extraction=given_structured_extraction + ) + + assert injection_result is True + + injected_state = given_application_state.explore_experiences_director_state.experiences_state["exp-empty"] + assert injected_state.dive_in_phase.name == "NOT_STARTED" + assert injected_state.experience.questions_and_answers == [] + assert "exp-empty" not in given_application_state.skills_explorer_agent_state.first_time_for_experience + + @pytest.mark.asyncio + async def test_inject_cv_data_handles_state_manager_error(self): + """Test that injection handles errors from state manager gracefully""" + # GIVEN a session and user + given_session_id = 999 + given_user_id = "user999" + + # AND a state manager that raises an error on get_state + failing_state_manager = AsyncMock() + failing_state_manager.get_state = AsyncMock(side_effect=Exception("State fetch failed")) + + # AND a state injection service + injection_service = StateInjectionService(application_state_manager=failing_state_manager) + + # AND structured extraction data + given_structured_extraction = CVStructuredExtraction( + collected_data=[_create_test_collected_data(index=0, experience_title="Test")], + experience_entities=[], + extraction_metadata={} + ) + + # WHEN injecting data and state manager fails + injection_result = await injection_service.inject_cv_data( + user_id=given_user_id, + session_id=given_session_id, + structured_extraction=given_structured_extraction + ) + + # THEN injection should return False + assert injection_result is False + + # AND save_state should not be called + failing_state_manager.save_state.assert_not_called() diff --git a/backend/app/users/cv/types.py b/backend/app/users/cv/types.py index 1616d8725..d4c8885aa 100644 --- a/backend/app/users/cv/types.py +++ b/backend/app/users/cv/types.py @@ -1,9 +1,11 @@ -import uuid from datetime import datetime, timezone from enum import Enum +import uuid from typing import Optional from pydantic import BaseModel, Field +from app.agent.collect_experiences_agent._types import CollectedData +from app.agent.experience.experience_entity import ExperienceEntity class CVUploadStateResponse(BaseModel): @@ -27,7 +29,15 @@ class ParsedCV(BaseModel): upload_id: str -class CVUploadStatusResponse(BaseModel): +class CVUploadListItemResponse(BaseModel): + """Response model for a single CV upload in the list endpoint""" + upload_id: str + filename: str + uploaded_at: str + upload_process_state: UploadProcessState + + +class CVUploadStatus(BaseModel): upload_id: str user_id: str filename: str @@ -37,17 +47,11 @@ class CVUploadStatusResponse(BaseModel): last_activity_at: datetime error_code: Optional['CVUploadErrorCode'] = None error_detail: str | None = None + state_injected: bool | None = None + injection_error: str | None = None experience_bullets: list[str] | None = None -class CVUploadResponseListItem(BaseModel): - upload_id: str - filename: str - uploaded_at: datetime - upload_process_state: UploadProcessState - experiences_data: Optional[list[str]] = None - - class CVUploadErrorCode(str, Enum): DUPLICATE_CV_UPLOAD = "DUPLICATE_CV_UPLOAD" MARKDOWN_TOO_LONG = "MARKDOWN_TOO_LONG" @@ -81,6 +85,16 @@ class UserCVUpload(BaseModel): # Optional error fields populated when FAILED error_code: str | None = Field(default=None, description="Machine-readable error code for failed uploads") error_detail: str | None = Field(default=None, description="Human-readable error detail for failed uploads") - # Optional experiences populated when COMPLETED - experience_bullets: list[str] | None = Field(default=None, - description="Extracted experiences bullets when available") + # State injection reporting + state_injected: bool = Field(default=False, description="Whether state was successfully injected") + injection_error: str | None = Field(default=None, description="Error message if injection failed") + # Structured extraction data stored when COMPLETED + structured_extraction: 'CVStructuredExtraction | None' = Field(default=None, + description="Structured extraction data for reinjection") + + +class CVStructuredExtraction(BaseModel): + """Structured extraction result compatible with agent states""" + collected_data: list[CollectedData] + experience_entities: list[ExperienceEntity] + extraction_metadata: dict diff --git a/backend/app/users/cv/utils/cv_responsibilities_extractor.py b/backend/app/users/cv/utils/cv_responsibilities_extractor.py new file mode 100644 index 000000000..6771fe65d --- /dev/null +++ b/backend/app/users/cv/utils/cv_responsibilities_extractor.py @@ -0,0 +1,58 @@ +import logging +from types import SimpleNamespace + +from app.agent.agent_types import AgentInput, AgentOutput +from app.agent.experience.experience_entity import ResponsibilitiesData +from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool +from app.conversation_memory.conversation_memory_types import ConversationContext, ConversationHistory, ConversationTurn + + +class CVResponsibilitiesExtractor: + """ + Thin wrapper around the existing responsibilities extraction logic + to support extracting responsibilities from CV text snippets. + """ + + def __init__(self, logger: logging.Logger | None, tool: _ResponsibilitiesExtractionTool): + self._logger = logger or logging.getLogger(self.__class__.__name__) + self._tool = tool + # expose underlying llm for tests that want to mock it directly + self._responsibilities_llm = self._tool._responsibilities_extraction_llm # noqa: SLF001 (intentional for tests) + + async def extract_responsibilities(self, experience_text: str) -> ResponsibilitiesData: + """Extract responsibilities given a single CV experience snippet.""" + # Build a minimal ConversationContext compatible with the tool + context = ConversationContext( + all_history=ConversationHistory(), + history=ConversationHistory(), + summary="", + ) + user_input = AgentInput(message=experience_text, is_artificial=True) + # provide a minimal agent output to construct a turn + agent_output = AgentOutput( + message_for_user="(cv responsibilities extraction)", + finished=True, + agent_type=None, + agent_response_time_in_sec=0, + llm_stats=[], + ) + context.all_history.turns.append(ConversationTurn(index=0, input=user_input, output=agent_output)) + + responsibilities, _stats = await self._tool.execute(user_input=user_input, context=context) + return responsibilities + + def _create_cv_context(self, experience_text: str): + """ + Create a light-weight context object for tests that mimics conversation context + with attributes accessed in tests (current_turn_index, user_input, agent_output). + """ + # Create a fake turn with user_input/agent_output attribute names as used in tests + turn = SimpleNamespace( + user_input=AgentInput(message=experience_text, is_artificial=True), + agent_output=SimpleNamespace(agent_type="cv_extractor"), + ) + history = SimpleNamespace(turns=[turn]) + fake_context = SimpleNamespace(all_history=history, current_turn_index=0) + return fake_context + + diff --git a/backend/app/users/cv/utils/cv_structured_extractor.py b/backend/app/users/cv/utils/cv_structured_extractor.py new file mode 100644 index 000000000..84841258e --- /dev/null +++ b/backend/app/users/cv/utils/cv_structured_extractor.py @@ -0,0 +1,307 @@ +import logging +from typing import List +from pydantic import BaseModel, Field + +from app.users.cv.utils.cv_responsibilities_extractor import CVResponsibilitiesExtractor +from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool +from app.users.cv.types import CVStructuredExtraction +from app.agent.collect_experiences_agent._types import CollectedData +from app.agent.experience.experience_entity import ExperienceEntity, ResponsibilitiesData +from app.agent.experience.timeline import Timeline +from app.agent.experience.work_type import WorkType +from app.agent.llm_caller import LLMCaller +from app.agent.prompt_template import sanitize_input +from common_libs.llm.generative_models import GeminiGenerativeLLM +from common_libs.llm.models_utils import LLMConfig, JSON_GENERATION_CONFIG, get_config_variation +from common_libs.retry import Retry +from app.agent.penalty import get_penalty + +_TAGS_TO_FILTER = [ + "CV Markdown", + "System Instructions", + "User's Last Input", + "Conversation History", +] + + +class CVStructuredExperience(BaseModel): + """Structured experience data extracted from CV.""" + experience_title: str = Field(description="Job title or role") + company: str | None = Field(default=None, description="Company or organization name") + location: str | None = Field(default=None, description="Work location") + start_date: str | None = Field(default=None, description="Start date") + end_date: str | None = Field(default=None, description="End date") + work_type: str | None = Field(default=None, description="Type of work (paid, volunteer, etc.)") + description: str | None = Field(default=None, description="Experience description") + experience_markdown: str | None = Field( + default=None, + description="Raw markdown snippet for this experience, including title and bullets" + ) + + +class CVStructuredExtractionResponse(BaseModel): + """Response from enhanced CV extraction LLM.""" + experiences: list[CVStructuredExperience] = Field(default_factory=list) + + +class CVStructuredExperienceExtractor: + """CV structured extractor that extracts experience data and creates agent-compatible objects.""" + + def __init__(self, logger: logging.Logger, responsibilities_extractor: CVResponsibilitiesExtractor): + self._logger = logger + self._llm_caller: LLMCaller[CVStructuredExtractionResponse] = LLMCaller[CVStructuredExtractionResponse]( + model_response_type=CVStructuredExtractionResponse + ) + self._responsibilities_extractor = responsibilities_extractor + self._penalty_level = 1 + + async def extract_structured_experiences(self, markdown_cv: str) -> CVStructuredExtraction: + """ + Enhanced extraction: structured JSON extraction + parallel responsibilities extraction. + + :param markdown_cv: The CV content in markdown format + :return: Structured extraction result with collected data and experience entities + """ + + self._logger.info("Starting enhanced CV extraction pipeline") + + # Stage 1: Extract structured experience data using LLM + self._logger.debug("Stage 1: Extracting structured experiences") + structured_experiences = await self._extract_structured_experiences(markdown_cv) + self._logger.info("Extracted %d structured experiences", len(structured_experiences)) + + # Stage 2: Extract responsibilities for each experience (parallel processing) + self._logger.debug("Stage 2: Extracting responsibilities in parallel") + experience_entities = [] + for i, experience in enumerate(structured_experiences): + self._logger.debug("Processing experience %d: %s", i + 1, experience.experience_title) + self._logger.debug( + "Extracted dates for '%s': start_date=%s, end_date=%s", + experience.experience_title, + experience.start_date, + experience.end_date + ) + + # Extract responsibilities using existing responsibilities extraction logic + responsibilities_input = ( + experience.experience_markdown + or experience.description + or f"{experience.experience_title} at {experience.company or 'Unknown'}" + ) + responsibilities_data = await self._responsibilities_extractor.extract_responsibilities( + responsibilities_input + ) + self._logger.info( + "Responsibilities extracted {title=%s, company=%s, count=%d}", + experience.experience_title, + experience.company, + len(responsibilities_data.responsibilities), + ) + if responsibilities_data.responsibilities: + self._logger.debug( + "Responsibilities sample for '%s': %s", + experience.experience_title, + "; ".join(responsibilities_data.responsibilities[:5]) + ) + + # Create ExperienceEntity with extracted data + experience_entity = self._create_experience_entity(experience, responsibilities_data) + experience_entities.append(experience_entity) + + self._logger.info( + "ExperienceEntity built {title=%s, responsibilities=%d, timeline=%s}", + experience.experience_title, + len(responsibilities_data.responsibilities), + f"start={experience_entity.timeline.start if experience_entity.timeline else None}, end={experience_entity.timeline.end if experience_entity.timeline else None}" + ) + + # Convert to CollectedData format for CollectExperiencesAgent + self._logger.debug("Stage 3: Converting to CollectedData format") + collected_data = self._convert_to_collected_data(experience_entities) + + self._logger.info("Enhanced extraction completed: %d experiences, %d collected data items", + len(experience_entities), len(collected_data)) + + return CVStructuredExtraction( + collected_data=collected_data, + experience_entities=experience_entities, + extraction_metadata={"total_experiences": len(experience_entities)} + ) + + def _convert_to_collected_data(self, experience_entities: List[ExperienceEntity]) -> List[CollectedData]: + """ + Convert ExperienceEntity objects to CollectedData format for CollectExperiencesAgent compatibility. + + :param experience_entities: List of ExperienceEntity objects to convert + :return: List of CollectedData objects compatible with CollectExperiencesAgent + """ + collected_data = [] + + for i, experience in enumerate(experience_entities): + # Extract basic info from ExperienceEntity + collected_item = CollectedData( + index=i, + experience_title=experience.experience_title, + company=experience.company, + location=experience.location, + start_date=experience.timeline.start if experience.timeline else None, + end_date=experience.timeline.end if experience.timeline else None, + paid_work=None, # Will be determined by existing conversation flow + work_type=experience.work_type.name if experience.work_type else None + ) + collected_data.append(collected_item) + + return collected_data + + async def _extract_structured_experiences(self, markdown_cv: str) -> list[CVStructuredExperience]: + """Extract structured experience data using LLM.""" + + self._logger.info("Extracting structured experiences from markdown {md_length_chars=%s}", len(markdown_cv or "")) + prompt = self._create_prompt((markdown_cv or "").strip()) + self._logger.debug("Prompt preview: %s", prompt[:200].replace("\n", " ")) + + async def _callback(attempt: int, max_retries: int) -> tuple[list[CVStructuredExperience], float, BaseException | None]: + # Vary temperature/top_p slightly across retries to escape bad local minima + temperature_cfg = get_config_variation(start_temperature=0.0, end_temperature=0.3, + start_top_p=0.9, end_top_p=1.0, + attempt=attempt, max_retries=max_retries) + llm = GeminiGenerativeLLM( + system_instructions=self._create_system_instructions(), + config=LLMConfig( + generation_config=temperature_cfg | JSON_GENERATION_CONFIG | { + "max_output_tokens": 3000 + } + ) + ) + try: + model_response, _ = await self._llm_caller.call_llm( + llm=llm, + llm_input=prompt, + logger=self._logger, + ) + except Exception as e: + return [], get_penalty(self._penalty_level), e + + if not model_response: + return [], get_penalty(self._penalty_level), ValueError("LLM returned no model response") + + experiences = model_response.experiences or [] + if not experiences: + return [], get_penalty(self._penalty_level), ValueError("LLM returned empty experiences list") + + # Success + return experiences, 0.0, None + + experiences, _penalty, _error = await Retry[list[CVStructuredExperience]].call_with_penalty( + callback=_callback, logger=self._logger + ) + if experiences: + self._logger.info("Structured experiences extracted {items=%s}", len(experiences)) + self._logger.debug("Extraction preview: %s", "; ".join([exp.experience_title for exp in experiences[:3]])) + else: + self._logger.error("LLM extraction failed to produce structured experiences after retries") + return experiences + + def _create_prompt(self, markdown_cv: str) -> str: + """Create prompt for structured experience extraction.""" + clean_md = sanitize_input(markdown_cv, _TAGS_TO_FILTER) + return f""" + +{clean_md} + +""" + + def _create_system_instructions(self) -> str: + """Create system instructions for structured experience extraction.""" + return """ + +You are an expert CV parser that extracts structured work experience data. + +Task: From the provided content, extract work experiences as structured JSON data. + +JSON Output Schema (must strictly follow): +{ + "experiences": [ + { + "experience_title": "string", + "company": "string or null", + "location": "string or null", + "start_date": "string or null", + "end_date": "string or null", + "work_type": "string or null", + "description": "string or null", + "experience_markdown": "string or null" + } + ] +} + +Rules for extraction: +- Extract ALL work/livelihood experiences from the CV +- Each experience must have at least an experience_title +- Include company name if mentioned +- Include location if mentioned +- Include start_date and end_date if they appear in the CV +- If end date says "Present", "Current", "Ongoing", or similar: use "Present" as end_date +- Include work type if determinable (e.g., "paid", "volunteer", "internship") +- Include description if there are responsibilities/tasks mentioned +- Provide experience_markdown as the raw markdown snippet for the experience (title + company + any bullet lists or sentences) exactly as it appears in the CV +- Do NOT include personal data (names, emails, phone numbers, addresses) +- Do NOT include education unless it's work-related +- Do NOT include skills sections unless they're part of a specific role + +Examples: +- "Software Engineer at Google (2020-2023), Mountain View, CA" → experience_title: "Software Engineer", company: "Google", location: "Mountain View, CA", start_date: "2020", end_date: "2023" +- "Volunteered as tutor at local school" → experience_title: "Tutor", company: "Local School", work_type: "volunteer" +- If the CV contains: + "- Software Engineer, TechCorp (2020-2023)\n - Developed web applications\n - Led team" + then experience_markdown must contain the same lines with the same formatting. + +Respond with JSON only. + +""" + + def _create_experience_entity(self, experience: CVStructuredExperience, responsibilities_data: ResponsibilitiesData) -> ExperienceEntity: + """Create ExperienceEntity from structured experience data and responsibilities.""" + + # Create timeline if dates are available (either start or end date) + timeline = None + if experience.start_date or experience.end_date: + timeline = Timeline( + start=experience.start_date, + end=experience.end_date + ) + + # Determine work type + work_type = self._determine_work_type(experience.work_type) + + return ExperienceEntity( + experience_title=experience.experience_title, + company=experience.company, + location=experience.location, + timeline=timeline, + work_type=work_type, + responsibilities=responsibilities_data, + # Skills will be populated by existing skills processing pipeline + top_skills=[], + remaining_skills=[], + summary=None + ) + + def _determine_work_type(self, work_type_str: str | None) -> WorkType | None: + """Determine WorkType from string.""" + if not work_type_str: + return WorkType.FORMAL_SECTOR_WAGED_EMPLOYMENT + + work_type_mapping = { + "volunteer": WorkType.UNSEEN_UNPAID, + "volunteering": WorkType.UNSEEN_UNPAID, + "unpaid": WorkType.UNSEEN_UNPAID, + "internship": WorkType.FORMAL_SECTOR_UNPAID_TRAINEE_WORK, + "trainee": WorkType.FORMAL_SECTOR_UNPAID_TRAINEE_WORK, + "self-employed": WorkType.SELF_EMPLOYMENT, + "self employment": WorkType.SELF_EMPLOYMENT, + "paid": WorkType.FORMAL_SECTOR_WAGED_EMPLOYMENT, + "waged": WorkType.FORMAL_SECTOR_WAGED_EMPLOYMENT, + } + + return work_type_mapping.get(work_type_str.lower(), WorkType.FORMAL_SECTOR_WAGED_EMPLOYMENT) diff --git a/backend/app/users/cv/utils/llm_extractor.py b/backend/app/users/cv/utils/llm_extractor.py deleted file mode 100644 index 8eace4cea..000000000 --- a/backend/app/users/cv/utils/llm_extractor.py +++ /dev/null @@ -1,128 +0,0 @@ -import logging -from textwrap import dedent -from typing import Optional - -from pydantic import BaseModel, Field - -from app.agent.llm_caller import LLMCaller -from app.agent.penalty import get_penalty -from app.agent.prompt_template import sanitize_input -from common_libs.llm.generative_models import GeminiGenerativeLLM -from common_libs.llm.models_utils import LLMConfig, JSON_GENERATION_CONFIG, get_config_variation -from common_libs.retry import Retry - -_TAGS_TO_FILTER = [ - "CV Markdown", - "System Instructions", - "User's Last Input", - "Conversation History", -] - - -class CVExtractionResponse(BaseModel): - experiences: list[str] = Field(default_factory=list) - - -class CVExperienceExtractor: - def __init__(self, logger: Optional[logging.Logger] = None): - self._logger = logger or logging.getLogger(self.__class__.__name__) - self._llm_caller: LLMCaller[CVExtractionResponse] = LLMCaller[CVExtractionResponse]( - model_response_type=CVExtractionResponse - ) - # Since all errors (hard error, no response, empty list) result in an empty reponse - # we treat them all as retryable with the same penalty - self._penalty_level = 1 - - @staticmethod - def _prompt(markdown_cv: str) -> str: - clean_md = sanitize_input(markdown_cv, _TAGS_TO_FILTER) - return dedent( - """ - - {markdown} - - """ - ).format(markdown=clean_md) - - @staticmethod - def _json_system_instructions() -> str: - return dedent( - """ - - You are an expert CV parser. - Task: From the provided content, output ONLY job/livelihood experiences as a JSON object with the schema below. - - JSON Output Schema (must strictly follow): - { - "experiences": ["string", ...] - } - - Rules for experiences: - - Each item must be a single sentence describing a work/livelihood experience. - - Each experience must be captured. Even if two experiences look similar, as long as they are - unique in role/title, location, company, or timeframe - - Skip any experiences that are completely duplicated - - Do not number items and do not add bullets or prefixes. - - An experience typically includes a role/title and usually a company/organization or receiver of work, a timeframe (e.g., from X to Y, since X, Present) and a location. - - Do NOT include standalone responsibilities/tasks unless they belong to a separate role in the same sentence. - - Do NOT include personal data: no person names of the CV owner, no email addresses, no phone numbers, - no street addresses, no personal websites or profile links (LinkedIn, GitHub, etc.). Company/organization names - and city/country locations are allowed. - - Some CVs might have responsibilities linked to an experience, do not include the responsibilities in experiences ('experiences' field). - - Do not include only experience title without other details (at least one more detail). - - Examples (format to emulate; style guidance, not strict): - Worked as a project manager at the University of Oxford, from 2018 to 2020. It was a paid job and you worked remotely. - Co-founded Acme Inc. in 2022, a gen-ai startup based in DC, USA. You owned this business and your role was CEO. - Volunteered as an instructor at Community Center in Berlin, from 2015 to 2017. - - No prose outside the JSON. Respond with JSON only. - - """ - ) - - async def extract_experiences(self, markdown_cv: str) -> list[str]: - self._logger.info("Extracting experiences from markdown {md_length_chars=%s}", len(markdown_cv or "")) - prompt = self._prompt((markdown_cv or "").strip()) - self._logger.debug("Prompt preview: %s", prompt[:200].replace("\n", " ")) - - async def _callback(attempt: int, max_retries: int) -> tuple[list[str], float, BaseException | None]: - # Vary temperature/top_p slightly across retries to escape bad local minima - temperature_cfg = get_config_variation(start_temperature=0.0, end_temperature=0.3, - start_top_p=0.9, end_top_p=1.0, - attempt=attempt, max_retries=max_retries) - llm = GeminiGenerativeLLM( - system_instructions=self._json_system_instructions(), - config=LLMConfig( - generation_config=temperature_cfg | JSON_GENERATION_CONFIG | { - "max_output_tokens": 2048 - } - ) - ) - try: - model_response, _ = await self._llm_caller.call_llm( - llm=llm, - llm_input=prompt, - logger=self._logger, - ) - except Exception as e: - return [], get_penalty(self._penalty_level), e - - if not model_response: - return [], get_penalty(self._penalty_level), ValueError("LLM returned no model response") - - items = model_response.experiences or [] - if not items: - return [], get_penalty(self._penalty_level), ValueError("LLM returned empty experiences list") - - # Success - return items, 0.0, None - - items, _penalty, _error = await Retry[list[str]].call_with_penalty(callback=_callback, logger=self._logger) - if items: - self._logger.info("Experiences extracted {items=%s}", len(items)) - self._logger.debug("Extraction preview: %s", "; ".join(items[:3])) - else: - self._logger.error("LLM extraction failed to produce items after retries") - return items - - diff --git a/backend/evaluation_tests/app_conversation_cv_upload_e2e_test.py b/backend/evaluation_tests/app_conversation_cv_upload_e2e_test.py new file mode 100644 index 000000000..58a8fd846 --- /dev/null +++ b/backend/evaluation_tests/app_conversation_cv_upload_e2e_test.py @@ -0,0 +1,251 @@ +import logging.config +import asyncio +from pathlib import Path +from typing import Awaitable + +import pytest +from tqdm import tqdm + +from app.agent.linking_and_ranking_pipeline import ExperiencePipelineConfig +from app.application_state import ApplicationStateManager, ApplicationState +from app.store.database_application_state_store import DatabaseApplicationStateStore +from app.server_dependencies.db_dependencies import CompassDBProvider +from app.users.cv.service import CVUploadService +from app.users.cv.repository import UserCVRepository +from app.users.cv.test_service import MockCVCloudStorageService +from app.users.cv.utils.cv_structured_extractor import CVStructuredExperienceExtractor +from app.users.cv.utils.cv_responsibilities_extractor import CVResponsibilitiesExtractor +from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool +from app.vector_search.vector_search_dependencies import SearchServices +from common_libs.test_utilities import get_random_session_id +from evaluation_tests.conversation_libs import conversation_generator +from evaluation_tests.conversation_libs.conversation_test_function import LLMSimulatedUser +from evaluation_tests.conversation_libs.evaluators.evaluation_result import ConversationEvaluationRecord +from evaluation_tests.conversation_libs.evaluators.evaluator_builder import create_evaluator +from evaluation_tests.conversation_libs.fake_conversation_context import save_conversation +from evaluation_tests.core_e2e_tests_cases import cv_upload_test_cases, CVUploadE2ETestCase +from evaluation_tests.e2e_chat_executor import E2EChatExecutor +from evaluation_tests.experience_summarizer.experience_summarizer_evaluator import ExperienceSummarizerEvaluator +from evaluation_tests.get_test_cases_to_run_func import get_test_cases_to_run + + +@pytest.fixture(scope="function") +def current_cv_upload_test_case(request) -> CVUploadE2ETestCase: + return request.param + + +@pytest.mark.asyncio +@pytest.mark.evaluation_test("gemini-2.0-flash-001/") +@pytest.mark.repeat(3) +@pytest.mark.parametrize('current_cv_upload_test_case', get_test_cases_to_run(cv_upload_test_cases), + ids=[case.name for case in get_test_cases_to_run(cv_upload_test_cases)]) +async def test_cv_upload_app_chat( + max_iterations: int, + current_cv_upload_test_case: CVUploadE2ETestCase, + common_folder_path: str, + setup_search_services: Awaitable[SearchServices], + setup_application_config +): + """ + E2E conversation test with CV upload, based on the test cases specified above. + It uploads a CV first (which injects state), then runs the conversation. + It calls the same endpoint as the frontend would call and does not mock any of the tested components. + """ + logger = logging.getLogger() + logger.info(f"Running CV upload test case {current_cv_upload_test_case.name}") + + # Skip if no CV file specified + if not current_cv_upload_test_case.cv_file_path: + pytest.skip(f"Test case {current_cv_upload_test_case.name} has no CV file specified") + + session_id = get_random_session_id() + user_id = f"test-user-{session_id}" + + # Load CV file + cv_base_dir = Path(__file__).parent / "cv_parser" / "test_inputs" + cv_file_path = cv_base_dir / current_cv_upload_test_case.cv_file_path + if not cv_file_path.exists(): + pytest.skip(f"CV file not found: {cv_file_path}") + + file_bytes = cv_file_path.read_bytes() + filename = cv_file_path.name + + # Setup search services + search_services = await setup_search_services + experience_pipeline_config = ExperiencePipelineConfig.model_validate( + {"number_of_clusters": current_cv_upload_test_case.given_number_of_clusters, + "number_of_top_skills_to_pick_per_cluster": current_cv_upload_test_case.given_number_of_top_skills_to_pick_per_cluster}) + logger.info(f"Experience pipeline config: {experience_pipeline_config}") + + # Setup ApplicationStateManager for CV upload (uses same DB as conversation will use) + db = await CompassDBProvider.get_application_db() + application_state_manager = ApplicationStateManager( + store=DatabaseApplicationStateStore(db), + default_country_of_user=current_cv_upload_test_case.country_of_user + ) + + # Ensure state exists for this session before CV upload + initial_state = ApplicationState.new_state( + session_id=session_id, + country_of_user=current_cv_upload_test_case.country_of_user + ) + await application_state_manager.save_state(initial_state) + + # Setup CV upload service with real extractors (e2e test uses real LLMs) + user_db = await CompassDBProvider.get_userdata_db() + cv_repository = UserCVRepository(user_db) + cv_storage_service = MockCVCloudStorageService() + cv_logger = logging.getLogger("CVUploadService") + # Wire dependencies explicitly for e2e test + tool = _ResponsibilitiesExtractionTool(cv_logger) + resp_extractor = CVResponsibilitiesExtractor(cv_logger, tool) + structured_extractor = CVStructuredExperienceExtractor(cv_logger, resp_extractor) + cv_upload_service = CVUploadService( + repository=cv_repository, + cv_cloud_storage_service=cv_storage_service, + application_state_manager=application_state_manager, + structured_extractor=structured_extractor + ) + + # Upload CV (this will inject state) + logger.info(f"Uploading CV: {filename}") + upload_id = await cv_upload_service.parse_cv( + user_id=user_id, + file_bytes=file_bytes, + filename=filename, + session_id=session_id # Use same session_id for injection + ) + + # Wait for CV processing pipeline to complete (including state injection) + logger.info(f"Waiting for CV processing to complete (upload_id: {upload_id})") + max_wait_time = 60 # seconds + wait_interval = 0.5 # seconds + waited = 0 + while waited < max_wait_time: + status = await cv_upload_service.get_upload_status(user_id=user_id, upload_id=upload_id) + if status and status.get("upload_process_state") in ["COMPLETED", "FAILED"]: + break + await asyncio.sleep(wait_interval) + waited += wait_interval + + if status and status.get("upload_process_state") == "FAILED": + logger.warning(f"CV upload failed: {status.get('error_detail')}") + + # Now create chat executor - it will create new state, so we need to load injected state + # Load state from manager (which has injected CV data) + injected_state = await application_state_manager.get_state(session_id) + + chat_executor = E2EChatExecutor(session_id=session_id, + default_country_of_user=current_cv_upload_test_case.country_of_user, + search_services=search_services, + experience_pipeline_config=experience_pipeline_config) + + # Replace executor's state with injected state (sync all agent states) + chat_executor._state = injected_state + chat_executor._conversation_memory_manager.set_state(injected_state.conversation_memory_manager_state) + chat_executor._agent_director.set_state(injected_state.agent_director_state) + chat_executor._agent_director.get_welcome_agent().set_state(injected_state.welcome_agent_state) + chat_executor._agent_director.get_explore_experiences_agent().set_state(injected_state.explore_experiences_director_state) + chat_executor._agent_director.get_explore_experiences_agent().get_collect_experiences_agent().set_state( + injected_state.collect_experience_state) + chat_executor._agent_director.get_explore_experiences_agent().get_exploring_skills_agent().set_state( + injected_state.skills_explorer_agent_state) + + evaluation_result = ConversationEvaluationRecord(simulated_user_prompt=current_cv_upload_test_case.simulated_user_prompt, + test_case=current_cv_upload_test_case.name) + failures = [] + try: + evaluation_result.add_conversation_records( + await conversation_generator.generate( + max_iterations=current_cv_upload_test_case.conversation_rounds if current_cv_upload_test_case.conversation_rounds else max_iterations, + execute_simulated_user=LLMSimulatedUser( + system_instructions=current_cv_upload_test_case.simulated_user_prompt), + execute_evaluated_agent=lambda agent_input: chat_executor.send_message(agent_input=agent_input), + is_finished=lambda agent_output: chat_executor.conversation_is_complete(agent_output=agent_output), + )) + actual_experiences_explored = chat_executor.get_experiences_explored() + + # Assert that at least one experience has been explored, + if not chat_executor.get_experiences_explored(): + failures.append("No experiences were explored during the conversation.") + else: + logger.info(f"Experiences successfully explored: {len(actual_experiences_explored)}") + + # Assert that all experiences discovered have been explored + actual_experiences_discovered = chat_executor.get_experiences_discovered() + if not actual_experiences_discovered: + failures.append("No experiences were discovered during the conversation.") + else: + logger.info(f"Experiences successfully discovered: {len(actual_experiences_discovered)}") + + # Assert that the discovered experiences match the explored ones + uuids_discovered = {exp.uuid for exp in actual_experiences_discovered} + uuids_explored = {exp.uuid for exp in actual_experiences_explored} + diff = uuids_discovered.symmetric_difference(uuids_explored) + if diff: + failures.append(f"Discovered experiences {uuids_discovered} do not match explored experiences {uuids_explored}." + f" - Difference: {diff}") + else: + logger.info("Discovered experiences match explored experiences.") + + # Assert that all experiences explored have at least the expected number of top skills explored + expected_top_skills_count = current_cv_upload_test_case.given_number_of_clusters * current_cv_upload_test_case.given_number_of_top_skills_to_pick_per_cluster + _passed_top_skills_count = True + # AND that all experiences explored have a summary + # AND that all experiences explored pass the ExperienceSummarizerEvaluator + _passed_has_summary = True + experience_summarizer_evaluator = ExperienceSummarizerEvaluator(current_cv_upload_test_case.country_of_user) + for experience in actual_experiences_explored: + if not experience.summary: + _passed_has_summary = False + failures.append(f"Experience {experience.experience_title} has no summary.") + if not experience.top_skills: + _passed_top_skills_count = False + failures.append(f"Experience {experience.experience_title} has no skills explored.") + elif len(experience.top_skills) < expected_top_skills_count: + _passed_top_skills_count = False + failures.append(f"Experience {experience.experience_title} " + f"has less than {expected_top_skills_count} skills explored: {len(experience.top_skills)}") + eval_result = await experience_summarizer_evaluator.evaluate( + experience_title=experience.experience_title, + company=experience.company, + work_type=experience.work_type, + responsibilities=experience.responsibilities.responsibilities, + top_skills=experience.top_skills, + questions_and_answers=experience.questions_and_answers, + llm_summary=experience.summary + ) + evaluation_result.add_evaluation_result(eval_result) + logger.info(f'Evaluation for {eval_result.evaluator_name}: {eval_result.score} {eval_result.reasoning}') + if not eval_result.meets_requirements: + failures.append(f"Experience {experience.experience_title} failed the summarization evaluation: " + f"{eval_result.reasoning}") + + if _passed_has_summary: + logger.info(f"All experiences explored have a summary.") + + if _passed_top_skills_count: + logger.info(f"All experiences explored have at least {expected_top_skills_count} skills explored.") + + for evaluation in tqdm(current_cv_upload_test_case.evaluations, desc='Evaluating'): + output = await create_evaluator(evaluation.type).evaluate(evaluation_result) + evaluation_result.add_evaluation_result(output) + logger.info(f'Evaluation for {output.evaluator_name}: {output.score} {output.reasoning}') + if output.score < evaluation.expected: + failures.append(f"{output.evaluator_name} expected " + f"{evaluation.expected} actual {output.score}") + except Exception as e: + logger.exception(f"Error in test case {current_cv_upload_test_case.name}: {e}", exc_info=True) + failures.append(f"Error in test case {current_cv_upload_test_case.name}: {e}") + finally: + output_folder = common_folder_path + 'e2e_test_cv_upload_' + current_cv_upload_test_case.name + evaluation_result.save_data(folder=output_folder, base_file_name='evaluation_record') + context = await chat_executor.get_conversation_memory_manager().get_conversation_context() + save_conversation(context, title=current_cv_upload_test_case.name, folder_path=output_folder) + + if failures: + failures = "\n - ".join(failures) + pytest.fail(f"Test case {current_cv_upload_test_case.name} failed with errors: {failures}") + else: + logger.info(f"Test case {current_cv_upload_test_case.name} passed") + diff --git a/backend/evaluation_tests/core_e2e_tests_cases.py b/backend/evaluation_tests/core_e2e_tests_cases.py index 6af645c66..7e1d73a49 100644 --- a/backend/evaluation_tests/core_e2e_tests_cases.py +++ b/backend/evaluation_tests/core_e2e_tests_cases.py @@ -1,4 +1,5 @@ from textwrap import dedent +from typing import Optional from pydantic import ConfigDict, BaseModel @@ -324,7 +325,7 @@ class E2ESpecificTestCase(E2ETestCase, DiscoveredExperienceTestCase): """) + system_instruction_prompt, evaluations=[Evaluation(type=EvaluationType.CONCISENESS, expected=60)] ), - + # Comprehensive multi-experience E2E test covering new functionality E2ETestCase( country_of_user=Country.SOUTH_AFRICA, @@ -354,3 +355,41 @@ class E2ESpecificTestCase(E2ETestCase, DiscoveredExperienceTestCase): evaluations=[Evaluation(type=EvaluationType.CONCISENESS, expected=30)] ) ] + + +class CVUploadE2ETestCase(E2ETestCase): + """ + E2E test case that includes CV upload before conversation starts. + """ + cv_file_path: Optional[str] = None + """ + Path to the CV file to upload (relative to evaluation_tests/cv_parser/test_inputs/) + If None, test will skip CV upload + """ + model_config = ConfigDict(extra="forbid") + + +cv_upload_test_cases = [ + CVUploadE2ETestCase( + country_of_user=Country.UNSPECIFIED, + conversation_rounds=50, + name='cv_upload_state_injection_e2e', + cv_file_path="simple-timeline.docx", # Use existing test CV file + simulated_user_prompt=dedent(""" + You are a professional who has uploaded your CV. + If asked if you want to start the conversation, agree to start without mentioning your CV upload. + + You have already uploaded your CV with your experiences: + - Software Developer at TechCorp from 2020 to 2022 + - Web Designer (Freelance) since 2022 + + When the agent asks about your experiences, DO NOT repeat all the information you already provided in your CV. + Instead, acknowledge that you've uploaded your CV and ask if they can see it. If they confirm they can see it, + just provide additional details or clarifications when asked. If they say they don't have access to it, + then provide the information naturally. + + Be concise and don't repeat information unnecessarily. + """) + system_instruction_prompt, + evaluations=[Evaluation(type=EvaluationType.CONCISENESS, expected=60)] + ), +] diff --git a/backend/evaluation_tests/cv_extraction_quality/responsibilities_evaluator.py b/backend/evaluation_tests/cv_extraction_quality/responsibilities_evaluator.py new file mode 100644 index 000000000..4b2c60aa0 --- /dev/null +++ b/backend/evaluation_tests/cv_extraction_quality/responsibilities_evaluator.py @@ -0,0 +1,103 @@ +import logging +from textwrap import dedent + +from pydantic import BaseModel, Field + +from app.agent.llm_caller import LLMCaller +from app.agent.prompt_template import sanitize_input +from common_libs.llm.generative_models import GeminiGenerativeLLM +from common_libs.llm.models_utils import LLMConfig, JSON_GENERATION_CONFIG, ZERO_TEMPERATURE_GENERATION_CONFIG + + +class ResponsibilitiesPrecisionRecallOutput(BaseModel): + precision: float = Field(ge=0.0, le=1.0) + recall: float = Field(ge=0.0, le=1.0) + justification: str + evaluator_name: str = "Responsibilities Precision/Recall Evaluator" + + class Config: + extra = "forbid" + + +class ResponsibilitiesEvaluator: + def __init__(self): + self._logger = logging.getLogger(self.__class__.__name__) + self._llm_caller: LLMCaller[ResponsibilitiesPrecisionRecallOutput] = LLMCaller[ResponsibilitiesPrecisionRecallOutput]( + model_response_type=ResponsibilitiesPrecisionRecallOutput + ) + self._llm = GeminiGenerativeLLM( + system_instructions=self.get_system_instructions(), + config=LLMConfig( + generation_config=ZERO_TEMPERATURE_GENERATION_CONFIG | JSON_GENERATION_CONFIG | { + "max_output_tokens": 1024 + } + ) + ) + + @staticmethod + def get_system_instructions() -> str: + return dedent( + """ + + You are an expert resume reviewer evaluating the quality of extracted responsibilities for a single experience. + Respond ONLY with a compact JSON object matching this schema: + { + "precision": , + "recall": , + "justification": "", + "evaluator_name": "Responsibilities Precision/Recall Evaluator" + } + + """ + ) + + @staticmethod + def get_prompt(*, markdown_cv: str, experience_title: str, company: str | None, responsibilities: list[str]) -> str: + responsibilities_bullets = "\n".join([f"- {s}" for s in responsibilities]) or "(none)" + return dedent( + """ + + + {cv} + + + + Title: {title} + Company: {company} + + + + {resp} + + + Instructions: + - precision: fraction of listed responsibilities directly supported by the CV text for this experience. + - recall: fraction of the key responsibilities in the CV for this experience that appear above. + Respond strictly in JSON. + + """ + ).format( + cv=sanitize_input(markdown_cv, ["System Instructions", "User's Last Input", "Conversation History", "CV Markdown"]), + title=sanitize_input(experience_title, ["System Instructions"]), + company=sanitize_input(company or "Unknown", ["System Instructions"]), + resp=sanitize_input(responsibilities_bullets, ["System Instructions", "CV Markdown"]) + ) + + async def evaluate(self, *, markdown_cv: str, experience_title: str, company: str | None, responsibilities: list[str]) -> ResponsibilitiesPrecisionRecallOutput: + prompt = self.get_prompt( + markdown_cv=markdown_cv, + experience_title=experience_title, + company=company, + responsibilities=responsibilities, + ) + model_response, _ = await self._llm_caller.call_llm( + llm=self._llm, + llm_input=prompt, + logger=self._logger, + ) + if not model_response: + self._logger.warning("Evaluator did not return JSON; returning default failure result") + return ResponsibilitiesPrecisionRecallOutput(precision=0.0, recall=0.0, justification="No response") + return model_response + + diff --git a/backend/evaluation_tests/cv_extraction_quality/test_extraction_quality.py b/backend/evaluation_tests/cv_extraction_quality/test_extraction_quality.py new file mode 100644 index 000000000..99e318688 --- /dev/null +++ b/backend/evaluation_tests/cv_extraction_quality/test_extraction_quality.py @@ -0,0 +1,114 @@ +import os +import logging +from pathlib import Path + +import pytest +from evaluation_tests.conversation_libs.evaluators.evaluation_result import EvaluationRecord, EvaluationResult +from evaluation_tests.cv_extraction_quality.responsibilities_evaluator import ( + ResponsibilitiesEvaluator, + ResponsibilitiesPrecisionRecallOutput, +) + + +def _list_cv_inputs() -> list[Path]: + dataset_dir = Path(__file__).parent + # pdf_dir = dataset_dir.parent / "cv_extraction_quality" / "test_inputs" + pdf_dir = dataset_dir.parent / "cv_parser" / "test_inputs" + cases: list[Path] = [] + if pdf_dir.exists(): + cases.extend(sorted(pdf_dir.glob("*.pdf"))) + return cases + + +CASES = _list_cv_inputs() +assert CASES, "No CV PDFs found under evaluation_tests/cv_parser/test_inputs. Please add at least one .pdf CV." + + +class CVResponsibilitiesEvaluationRecord(EvaluationRecord): + cv_name: str + markdown_cv: str + per_experience_results: list[str] + averages: dict + + def _to_markdown(self) -> str: + lines = "\n".join(self.per_experience_results) or "(no experiences evaluated)" + avg_line = f"precision={self.averages.get('precision', 0):.2f}, recall={self.averages.get('recall', 0):.2f}" + return (f"# Test case: {self.test_case}\n\n" + f"## CV: {self.cv_name}\n\n" + f"## Input CV (markdown)\n{self.markdown_cv}\n\n" + f"## Per-experience results\n{lines}\n\n" + f"## Averages\n{avg_line}\n\n" + f"## Evaluations\n{self._get_evaluations_str()}") + + +@pytest.mark.asyncio +@pytest.mark.evaluation_test("gemini-2.5-pro") +@pytest.mark.repeat(1) +@pytest.mark.parametrize("cv_input_path", CASES, ids=[p.name for p in CASES] if CASES else None) +async def test_cv_extraction_quality_precision_recall(cv_input_path: Path, common_folder_path: str): + from app.users.cv.utils.cv_structured_extractor import CVStructuredExperienceExtractor + from app.users.cv.utils.cv_responsibilities_extractor import CVResponsibilitiesExtractor + from app.users.cv.utils.markdown_converter import convert_cv_bytes_to_markdown + from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool + + logger = logging.getLogger("CVExtractionQualityEvaluator") + tool = _ResponsibilitiesExtractionTool(logger) + resp_extractor = CVResponsibilitiesExtractor(logger, tool) + extractor = CVStructuredExperienceExtractor(logger, resp_extractor) + # Load and convert a single input + if cv_input_path.suffix.lower() == ".pdf": + cv_markdown = convert_cv_bytes_to_markdown(cv_input_path.read_bytes(), cv_input_path.name, logger) + else: + cv_markdown = cv_input_path.read_text(encoding="utf-8") + + evaluator = ResponsibilitiesEvaluator() + evaluations: list[ResponsibilitiesPrecisionRecallOutput] = [] + per_exp_lines: list[str] = [] + extraction = await extractor.extract_structured_experiences(cv_markdown) + + # Collect experiences with responsibilities to evaluate + experiences_with_responsibilities = [] + for exp in extraction.experience_entities: + responsibilities = exp.responsibilities.responsibilities or [] + if responsibilities: + experiences_with_responsibilities.append((exp, responsibilities)) + + if not experiences_with_responsibilities: + pytest.skip(f"No responsibilities extracted from any experience in {cv_input_path.name}. Cannot evaluate precision/recall.") + + for exp, responsibilities in experiences_with_responsibilities: + evaluation = await evaluator.evaluate( + markdown_cv=cv_markdown, + experience_title=exp.experience_title, + company=exp.company, + responsibilities=responsibilities, + ) + evaluations.append(evaluation) + per_exp_lines.append( + f"- {exp.experience_title} at {exp.company or 'Unknown'}: precision={evaluation.precision:.2f}, " + f"recall={evaluation.recall:.2f} — {evaluation.justification}" + ) + + assert evaluations, "No evaluations were produced." + avg_prec = sum(e.precision for e in evaluations) / len(evaluations) + avg_rec = sum(e.recall for e in evaluations) / len(evaluations) + + assert avg_prec >= 0.2 or avg_rec >= 0.2, f"Low scores: precision={avg_prec:.2f}, recall={avg_rec:.2f}" + + # Save evaluation record in the standard format + record = CVResponsibilitiesEvaluationRecord( + test_case=f"cv_extraction_quality_{cv_input_path.name}", + cv_name=cv_input_path.name, + markdown_cv=cv_markdown, + per_experience_results=per_exp_lines, + averages={"precision": avg_prec, "recall": avg_rec}, + ) + record.add_evaluation_result(EvaluationResult( + evaluator_name="ResponsibilitiesPrecisionRecall", + score=int(round(100 * max(avg_prec, avg_rec))), + reasoning=f"precision={avg_prec:.2f}, recall={avg_rec:.2f}", + )) + out_folder = os.path.join(common_folder_path, f"cv_extraction_quality_{cv_input_path.stem}") + record.save_data(folder=out_folder, base_file_name="evaluation_record") + + diff --git a/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv1.pdf b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv1.pdf new file mode 100644 index 000000000..93e24a130 Binary files /dev/null and b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv1.pdf differ diff --git a/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv2.pdf b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv2.pdf new file mode 100644 index 000000000..3585fc7c3 Binary files /dev/null and b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv2.pdf differ diff --git a/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv3.pdf b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv3.pdf new file mode 100644 index 000000000..9d3c9e858 Binary files /dev/null and b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv3.pdf differ diff --git a/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv4.pdf b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv4.pdf new file mode 100644 index 000000000..6b254ba33 Binary files /dev/null and b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv4.pdf differ diff --git a/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv5.pdf b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv5.pdf new file mode 100644 index 000000000..d1ddee79a Binary files /dev/null and b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv5.pdf differ diff --git a/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv6.pdf b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv6.pdf new file mode 100644 index 000000000..b6dd6d4b6 Binary files /dev/null and b/backend/evaluation_tests/cv_extraction_quality/test_inputs/cv6.pdf differ diff --git a/backend/evaluation_tests/cv_extraction_quality/test_skills_relevance.py b/backend/evaluation_tests/cv_extraction_quality/test_skills_relevance.py new file mode 100644 index 000000000..f571dec78 --- /dev/null +++ b/backend/evaluation_tests/cv_extraction_quality/test_skills_relevance.py @@ -0,0 +1,174 @@ +import json +import logging +from pathlib import Path +from typing import Optional, Awaitable + +import pytest +from pydantic import BaseModel + +from app.agent.experience.work_type import WorkType +from app.agent.linking_and_ranking_pipeline import ExperiencePipeline, ExperiencePipelineConfig +from app.countries import Country +from app.users.cv.utils.cv_structured_extractor import CVStructuredExperienceExtractor +from app.users.cv.utils.markdown_converter import convert_cv_bytes_to_markdown +from app.vector_search.vector_search_dependencies import SearchServices +from common_libs.llm.generative_models import GeminiGenerativeLLM +from common_libs.llm.models_utils import LLMConfig +from common_libs.text_formatters import extract_json + + +evaluation_test = pytest.mark.evaluation_test +repeat = pytest.mark.repeat + + +class SkillsRelevanceEvaluation(BaseModel): + relevance: float + justification: str + suggested_skills: list[str] + + +async def _evaluate_skills_relevance( + llm: GeminiGenerativeLLM, + cv_text: str, + experience_title: str, + company: Optional[str], + responsibilities: list[str], + produced_skills: list[str], +) -> SkillsRelevanceEvaluation: + prompt = f""" + You are an expert career evaluator. Assess whether the skills produced by our automated pipeline are an accurate reflection of the responsibilities listed in this CV entry. + + CONTEXT - CV MARKDOWN: + {cv_text} + + EXPERIENCE: + Title: {experience_title} + Company: {company or ""} + Responsibilities: + {json.dumps(responsibilities, indent=2)} + + SKILLS PRODUCED BY PIPELINE: + {json.dumps(produced_skills, indent=2)} + + Provide a JSON with: + - relevance: number between 0 and 1 indicating how well the produced skills align with the CV responsibilities (1 = excellent match, 0 = completely wrong or missing) + - justification: a brief reason referencing the CV/responsibilities and the produced skills + - suggested_skills: up to 5 specific skills that should replace or augment the produced list if anything important is missing (strings) + """ + response = await llm.generate_content(prompt) + return extract_json.extract_json(response.text, SkillsRelevanceEvaluation) + + +def _list_cv_inputs() -> list[Path]: + dataset_dir = Path(__file__).parent + pdf_dir = dataset_dir.parent / "cv_parser" / "test_inputs" + cases: list[Path] = [] + if pdf_dir.exists(): + cases.extend(sorted(pdf_dir.glob("*.pdf"))) + return cases + + +CASES = _list_cv_inputs() +assert CASES, "No CV PDFs found under evaluation_tests/cv_parser/test_inputs. Please add at least one .pdf CV." + + +@pytest.fixture(scope="module", params=[pytest.param(f, id=f.name) for f in CASES]) +def cv_file_path(request) -> Path: + return request.param + + +@pytest.mark.asyncio +@evaluation_test +@repeat(1) +async def test_cv_skills_relevance_from_responsibilities( + cv_file_path: Path, + setup_search_services: Awaitable[SearchServices], +): + from app.users.cv.utils.cv_responsibilities_extractor import CVResponsibilitiesExtractor + from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool + + logger = logging.getLogger("CVSkillsRelevanceEvaluator") + tool = _ResponsibilitiesExtractionTool(logger) + resp_extractor = CVResponsibilitiesExtractor(logger, tool) + extractor = CVStructuredExperienceExtractor(logger, resp_extractor) + + search_services = await setup_search_services + pipeline = ExperiencePipeline( + config=ExperiencePipelineConfig(), + search_services=search_services, + ) + + if cv_file_path.suffix.lower() == ".pdf": + file_bytes = cv_file_path.read_bytes() + cv_markdown = convert_cv_bytes_to_markdown(file_bytes, cv_file_path.name, logger) + else: + cv_markdown = cv_file_path.read_text(encoding="utf-8") + + extraction = await extractor.extract_structured_experiences(cv_markdown) + + # Use LLM to judge per-experience skills relevance implied by responsibilities + llm = GeminiGenerativeLLM(config=LLMConfig(language_model_name="gemini-2.5-pro")) + evaluations: list[SkillsRelevanceEvaluation] = [] + total_responsibilities = 0 + total_produced_skills = 0 + for exp in extraction.experience_entities: + responsibilities = exp.responsibilities.responsibilities or [] + if not responsibilities: + continue + total_responsibilities += len(responsibilities) + logger.info( + "Experience extracted {title=%s, company=%s, responsibilities=%s}", + exp.experience_title, + exp.company, + json.dumps(responsibilities, ensure_ascii=False), + ) + pipeline_response = await pipeline.execute( + experience_title=exp.experience_title or "", + responsibilities=responsibilities, + company_name=exp.company, + country_of_interest=Country.UNSPECIFIED, + work_type=exp.work_type or WorkType.FORMAL_SECTOR_WAGED_EMPLOYMENT, + ) + produced_skills = [skill.preferredLabel for skill in pipeline_response.top_skills] + if not produced_skills: + logger.warning( + "Pipeline produced no skills {title=%s, company=%s, responsibilities=%s}", + exp.experience_title, + exp.company, + json.dumps(responsibilities, ensure_ascii=False), + ) + continue + total_produced_skills += len(produced_skills) + eval_item = await _evaluate_skills_relevance( + llm=llm, + cv_text=cv_markdown, + experience_title=exp.experience_title, + company=exp.company, + responsibilities=responsibilities, + produced_skills=produced_skills, + ) + evaluations.append(eval_item) + logger.info( + "Evaluation result {title=%s, company=%s, pipeline_skills=%s, relevance=%.2f, suggested_skills=%s, justification=%s}", + exp.experience_title, + exp.company, + json.dumps(produced_skills, ensure_ascii=False), + eval_item.relevance, + json.dumps(eval_item.suggested_skills, ensure_ascii=False), + eval_item.justification, + ) + + # Skip very sparse CVs that do not provide enough responsibilities for a meaningful evaluation + if total_responsibilities < 3: + pytest.skip(f"Skipping {cv_file_path.name}: not enough responsibilities for evaluation (found {total_responsibilities}).") + + if total_produced_skills == 0: + pytest.skip(f"Skipping {cv_file_path.name}: pipeline produced no skills for evaluation.") + + assert evaluations, f"No skills relevance evaluations were produced for {cv_file_path.name}." + avg_relevance = sum(e.relevance for e in evaluations) / len(evaluations) + + # Minimal threshold for now; refine prompt later + assert avg_relevance >= 0.2, f"Low skills relevance for {cv_file_path.name}: {avg_relevance:.2f}" + + diff --git a/backend/evaluation_tests/cv_parser/test_cv_parser.py b/backend/evaluation_tests/cv_parser/test_cv_parser.py index 323e89671..00dc6ced6 100644 --- a/backend/evaluation_tests/cv_parser/test_cv_parser.py +++ b/backend/evaluation_tests/cv_parser/test_cv_parser.py @@ -2,7 +2,10 @@ import pytest -from app.users.cv.utils.llm_extractor import CVExperienceExtractor +import logging +from app.users.cv.utils.cv_structured_extractor import CVStructuredExperienceExtractor +from app.users.cv.utils.cv_responsibilities_extractor import CVResponsibilitiesExtractor +from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool from evaluation_tests.conversation_libs.evaluators.evaluation_result import EvaluationResult, EvaluationRecord from evaluation_tests.cv_parser.cv_parser_evaluator import CVParserEvaluator from evaluation_tests.cv_parser.test_cases import test_cases, CVParserTestCase @@ -39,8 +42,24 @@ def _to_markdown(self) -> str: @pytest.mark.repeat(3) @pytest.mark.parametrize("case", test_cases_to_run, ids=[c.name for c in test_cases_to_run]) async def test_cv_parser(case: CVParserTestCase, common_folder_path: str): - extractor = CVExperienceExtractor() - items = await extractor.extract_experiences(case.markdown_cv) + logger = logging.getLogger("CVStructuredExtractorEval") + tool = _ResponsibilitiesExtractionTool(logger) + resp_extractor = CVResponsibilitiesExtractor(logger, tool) + extractor = CVStructuredExperienceExtractor(logger, resp_extractor) + structured = await extractor.extract_structured_experiences(case.markdown_cv) + # Convert structured experiences to simple lines for backward-compatible evaluation + items = [] + for e in structured.experience_entities: + parts = [e.experience_title] + if e.company: + parts.append(f"at {e.company}") + if e.location: + parts.append(e.location) + if e.timeline and e.timeline.start: + parts.append(e.timeline.start) + if e.timeline and e.timeline.end: + parts.append(e.timeline.end) + items.append(" ".join(parts).strip()) # write to an output file for manual inspection write_to_file(folder=common_folder_path + f"cv_parser_{case.name}", diff --git a/backend/evaluation_tests/cv_parser/test_parse_cv_on_files.py b/backend/evaluation_tests/cv_parser/test_parse_cv_on_files.py index facc106f7..012884bb6 100644 --- a/backend/evaluation_tests/cv_parser/test_parse_cv_on_files.py +++ b/backend/evaluation_tests/cv_parser/test_parse_cv_on_files.py @@ -5,7 +5,9 @@ import pytest -from app.users.cv.utils.llm_extractor import CVExperienceExtractor +from app.users.cv.utils.cv_structured_extractor import CVStructuredExperienceExtractor +from app.users.cv.utils.cv_responsibilities_extractor import CVResponsibilitiesExtractor +from app.agent.skill_explorer_agent._responsibilities_extraction_tool import _ResponsibilitiesExtractionTool from app.users.cv.utils.markdown_converter import convert_cv_bytes_to_markdown BASE_DIR = Path(__file__).parent @@ -45,16 +47,44 @@ async def test_parse_cv_on_real_files(input_path: Path | None, attempt: int): pytest.skip("No input files found under evaluation_tests/cv_parser/test_inputs; add files to run this test") logger = logging.getLogger("CVUploadServiceIntegrationTest") - extractor = CVExperienceExtractor(logger=logger) + tool = _ResponsibilitiesExtractionTool(logger) + resp_extractor = CVResponsibilitiesExtractor(logger, tool) + extractor = CVStructuredExperienceExtractor(logger, resp_extractor) file_bytes = input_path.read_bytes() filename = input_path.name # WHEN parsing the CV mark_down = convert_cv_bytes_to_markdown(file_bytes=file_bytes, filename=filename, logger=logger) - experiences_data = await extractor.extract_experiences(mark_down) - logger.info("Parsed experiences: %s", experiences_data or "[]") - experiences = experiences_data or [] + structured = await extractor.extract_structured_experiences(mark_down) + # Convert structured experiences to simple lines for backward-compatible keyword checks + def _extract_year(date_str: str | None) -> str | None: + """Extract year from date string (handles formats like '2019', '09/2019', '2019-09', etc.)""" + if not date_str: + return None + # Try to extract year (last 4 digits or first 4 digits if it looks like YYYY-MM-DD) + import re + # Match 4-digit year + year_match = re.search(r'\b(19|20)\d{2}\b', date_str) + return year_match.group(0) if year_match else date_str + + experiences = [] + for e in structured.experience_entities: + parts = [e.experience_title] + if e.company: + parts.append(f"at {e.company}") + if e.location: + parts.append(e.location) + if e.timeline and e.timeline.start: + year = _extract_year(e.timeline.start) + if year: + parts.append(year) + if e.timeline and e.timeline.end: + year = _extract_year(e.timeline.end) + if year: + parts.append(year) + experiences.append(" ".join(parts).strip()) + logger.info("Parsed experiences: %s", experiences or "[]") # THEN the extracted experiences should match expectations (probabilistic: run multiple times) expectation = _load_expectation_for(input_path) diff --git a/backend/evaluation_tests/experience_summarizer/experience_summarizer_evaluator.py b/backend/evaluation_tests/experience_summarizer/experience_summarizer_evaluator.py index 892dbf729..7b3618040 100644 --- a/backend/evaluation_tests/experience_summarizer/experience_summarizer_evaluator.py +++ b/backend/evaluation_tests/experience_summarizer/experience_summarizer_evaluator.py @@ -151,8 +151,8 @@ def get_prompt(*, responsibilities=', '.join(responsibilities) if responsibilities else "N/A", skills=_skills, questions_and_answers=_questions_and_answers, - llm_summary=llm_summary, - llm_summary_length=str(len(llm_summary.split())) # Count words for length + llm_summary=llm_summary or "", + llm_summary_length=str(len((llm_summary or "").split())) # Count words for length ) async def evaluate( diff --git a/frontend-new/src/CV/CVService/CVService.test.ts b/frontend-new/src/CV/CVService/CVService.test.ts index 62c8263b8..f9ce2a144 100644 --- a/frontend-new/src/CV/CVService/CVService.test.ts +++ b/frontend-new/src/CV/CVService/CVService.test.ts @@ -206,7 +206,6 @@ describe("CVService", () => { filename: "file.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: [], }; const fetchSpy = setupAPIServiceSpy(StatusCodes.OK, expectedResponse, "application/json;charset=UTF-8"); @@ -303,14 +302,12 @@ describe("CVService", () => { filename: "file1.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: [], }, { upload_id: "cv2", filename: "file2.docx", uploaded_at: new Date().toISOString(), upload_process_state: "PROCESSING", - experiences_data: [], }, ]; const fetchSpy = setupAPIServiceSpy(StatusCodes.OK, expectedResponse, "application/json;charset=UTF-8"); diff --git a/frontend-new/src/CV/CVService/CVService.ts b/frontend-new/src/CV/CVService/CVService.ts index c78bdbf9c..11e80cfaf 100644 --- a/frontend-new/src/CV/CVService/CVService.ts +++ b/frontend-new/src/CV/CVService/CVService.ts @@ -120,7 +120,9 @@ export default class CVService { last_activity_at: string; error_code?: string; error_detail?: string; - experience_bullets?: string[]; + state_injected?: boolean; + injection_error?: string | null; + experience_bullets?: string[] | null; }> { const serviceName = "CVService"; const serviceFunction = "getUploadStatus"; @@ -188,4 +190,32 @@ export default class CVService { ); } } + + public async reinjectFromUpload(userId: string, uploadId: string): Promise<{ success: boolean; error?: string; experience_bullets?: string[] | null }>{ + const serviceName = "CVService"; + const serviceFunction = "reinjectFromUpload"; + const method = "POST"; + const constructedUrl = `${this.cvEndpointUrl}/${userId}/cv/${uploadId}/inject`; + + const response = await customFetch(constructedUrl, { + method: method, + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + expectedStatusCode: StatusCodes.OK, + serviceName, + serviceFunction, + failureMessage: `Failed to reinject CV ${uploadId} for user ${userId}`, + expectedContentType: "application/json", + retryOnFailedToFetch: true, + }); + + const payload = (await response.json().catch(() => ({}))) as { state_injected?: boolean; error?: string; experience_bullets?: string[] | null }; + return { + success: Boolean(payload.state_injected), + error: payload.error || (payload.state_injected === false ? "Reinjection failed" : undefined), + experience_bullets: payload.experience_bullets + }; + } } diff --git a/frontend-new/src/CV/CVService/CVService.types.ts b/frontend-new/src/CV/CVService/CVService.types.ts index ee50a226f..10931d32b 100644 --- a/frontend-new/src/CV/CVService/CVService.types.ts +++ b/frontend-new/src/CV/CVService/CVService.types.ts @@ -5,5 +5,4 @@ export type CVListItem = { filename: string; uploaded_at: string; upload_process_state: UploadProcessState; - experiences_data: string[] | null; }; diff --git a/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.stories.tsx b/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.stories.tsx index 39c88587a..f6c061394 100644 --- a/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.stories.tsx +++ b/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.stories.tsx @@ -23,14 +23,12 @@ export const Shown: Story = { filename: "John_Doe_CV.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: [], }, { upload_id: "2", filename: "Jane_Smith_Resume.docx", uploaded_at: new Date(Date.now() - 86400000).toISOString(), // 1 day ago upload_process_state: "COMPLETED", - experiences_data: [], }, ], isLoading: false, @@ -53,7 +51,6 @@ export const Disabled: Story = { filename: "John_Doe_CV.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: [], }, ], isLoading: false, diff --git a/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.test.tsx b/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.test.tsx index 474b3e50e..df581b1e4 100644 --- a/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.test.tsx +++ b/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.test.tsx @@ -26,7 +26,6 @@ describe("UploadedCVsMenuContent", () => { filename: "foo_bar.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: [], }, ]} onSelect={jest.fn()} @@ -85,6 +84,28 @@ describe("UploadedCVsMenuContent", () => { expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }); + + test("should show reinjection progress indicator when reinjecting", () => { + render( + + ); + + expect(screen.getByTestId(DATA_TEST_ID.UPLOADED_CVS_MENU_PROGRESS)).toBeInTheDocument(); + }); }); describe("action tests", () => { @@ -97,7 +118,6 @@ describe("UploadedCVsMenuContent", () => { filename: "foo_bar.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: ["foo", "bar"], }; // AND the component is rendered render( diff --git a/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.tsx b/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.tsx index 208ad43b8..1aff151eb 100644 --- a/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.tsx +++ b/frontend-new/src/CV/uploadedCVsMenu/UploadedCVsMenu.tsx @@ -1,5 +1,5 @@ import React from "react"; -import { Box, Typography, Skeleton, useTheme } from "@mui/material"; +import { Box, Typography, Skeleton, useTheme, CircularProgress } from "@mui/material"; import ArrowBackIcon from "@mui/icons-material/ArrowBack"; import InfoIcon from "@mui/icons-material/Info"; import DescriptionOutlinedIcon from "@mui/icons-material/DescriptionOutlined"; @@ -20,6 +20,7 @@ export const DATA_TEST_ID = { UPLOADED_CVS_MENU_FILE_NAME: `uploaded-cvs-menu-file-name-${uniqueId}`, UPLOADED_CVS_MENU_UPLOAD_DATE: `uploaded-cvs-menu-upload-date-${uniqueId}`, UPLOADED_CVS_MENU_SKELETON: `uploaded-cvs-menu-skeleton-${uniqueId}`, + UPLOADED_CVS_MENU_PROGRESS: `uploaded-cvs-menu-progress-${uniqueId}`, }; interface UploadedCVsMenuContentProps { @@ -28,6 +29,7 @@ interface UploadedCVsMenuContentProps { onBack: () => void; isLoading: boolean; currentPhase?: ConversationPhase; + isReinjecting?: boolean; } const UploadedCVsMenu: React.FC = ({ @@ -36,13 +38,10 @@ const UploadedCVsMenu: React.FC = ({ onSelect, uploadedCVs, isLoading, + isReinjecting = false, }) => { const theme = useTheme(); - const isCollectPhase = currentPhase === ConversationPhase.COLLECT_EXPERIENCES; - - const helpTipText = isCollectPhase - ? "Tap a CV to load its content into the text field. Review and send when you're ready." - : "CV selection is only available during the experience collection phase."; + const helpTipText = "Select a CV to inject its experiences into this conversation."; return ( = ({ } data-testid={DATA_TEST_ID.UPLOADED_CVS_MENU_HELP_TIP}> {helpTipText} + {isReinjecting && ( + + )} = ({ {!isLoading && uploadedCVs.map((cv) => { - const disabled = !isCollectPhase; + const disabled = isReinjecting; return ( { cancel_requested: false, created_at: new Date().toISOString(), last_activity_at: new Date().toISOString(), - experience_bullets: ["line 1", "line 2"], } as any); // WHEN the component is rendered diff --git a/frontend-new/src/chat/Chat.tsx b/frontend-new/src/chat/Chat.tsx index 2687add15..f7b650ad2 100644 --- a/frontend-new/src/chat/Chat.tsx +++ b/frontend-new/src/chat/Chat.tsx @@ -1,5 +1,7 @@ import React, { Suspense, useCallback, useEffect, useMemo, useRef, useState } from "react"; import ChatService from "src/chat/ChatService/ChatService"; +import MetricsService from "src/metrics/metricsService"; +import { EventType } from "src/metrics/types"; import ChatList from "src/chat/chatList/ChatList"; import { IChatMessage } from "src/chat/Chat.types"; import { @@ -20,7 +22,7 @@ import ChatMessageField from "./ChatMessageField/ChatMessageField"; import { useNavigate } from "react-router-dom"; import { routerPaths } from "src/app/routerPaths"; import UserPreferencesStateService from "src/userPreferences/UserPreferencesStateService"; -import { ConversationMessage, ConversationMessageSender } from "./ChatService/ChatService.types"; +import { ConversationMessage, ConversationMessageSender, ConversationResponse } from "./ChatService/ChatService.types"; import { Backdrop } from "src/theme/Backdrop/Backdrop"; import ExperiencesDrawer from "src/experiences/experiencesDrawer/ExperiencesDrawer"; import { DiveInPhase, Experience } from "src/experiences/experienceService/experiences.types"; @@ -28,7 +30,7 @@ import ExperienceService from "src/experiences/experienceService/experienceServi import InactiveBackdrop from "src/theme/Backdrop/InactiveBackdrop"; import ConfirmModalDialog from "src/theme/confirmModalDialog/ConfirmModalDialog"; import AuthenticationServiceFactory from "src/auth/services/Authentication.service.factory"; -import { ChatError } from "src/error/commonErrors"; +import { AuthenticationError, ChatError } from "src/error/commonErrors"; import authenticationStateService from "src/auth/services/AuthenticationState.service"; import { issueNewSession } from "./issueNewSession"; import { ChatProvider } from "src/chat/ChatContext"; @@ -40,6 +42,7 @@ import { CONVERSATION_CONCLUSION_CHAT_MESSAGE_TYPE } from "./chatMessage/convers import { SkillsRankingService } from "src/features/skillsRanking/skillsRankingService/skillsRankingService"; import { useSkillsRanking } from "src/features/skillsRanking/hooks/useSkillsRanking"; import cvService from "src/CV/CVService/CVService"; +import { useCvBulletsHandler } from "./hooks/useCvBulletsHandler"; import { getCvUploadDisplayMessage, getUploadErrorMessage, @@ -153,7 +156,6 @@ export const Chat: React.FC> = ({ const { showSkillsRanking } = useSkillsRanking(addMessageToChat, removeMessageFromChat); - // Depending on the typing state, add or remove the typing message from the messages list const addOrRemoveTypingMessage = (userIsTyping: boolean) => { if (userIsTyping) { @@ -218,6 +220,98 @@ export const Chat: React.FC> = ({ } }, [enqueueSnackbar, activeSessionId]); + // Helper function to process chat history response and update state + const processChatHistoryResponse = useCallback( + async ( + response: ConversationResponse, + options: { + skipUserMessage?: string; // Skip user messages matching this text (for optimistic updates) + sessionId: number; + } + ) => { + const { skipUserMessage, sessionId } = options; + + // Update explored experiences + setExploredExperiences(response.experiences_explored); + if (response.experiences_explored > exploredExperiences) { + setExploredExperiencesNotification(true); + await fetchExperiences(); + } + + // Process messages (skip conclusion message and optionally skip matching user messages) + // Use functional update to check for existing messages and avoid duplicates + setMessages((prevMessages) => { + const existingMessageIds = new Set(prevMessages.map((msg) => msg.message_id)); + const newMessages: IChatMessage[] = []; + + response.messages.forEach((messageItem: ConversationMessage, idx: number) => { + const isConclusionMessage = response.conversation_completed && idx === response.messages.length - 1; + if (!isConclusionMessage) { + // Skip if message already exists + if (existingMessageIds.has(messageItem.message_id)) { + return; + } + + // Skip user messages that match the one we already added optimistically + if ( + messageItem.sender === ConversationMessageSender.USER && + skipUserMessage && + messageItem.message === skipUserMessage + ) { + return; + } + + // Add all other messages + if (messageItem.sender === ConversationMessageSender.USER) { + newMessages.push(generateUserMessage(messageItem.message, messageItem.sent_at, messageItem.message_id)); + } else { + newMessages.push( + generateCompassMessage( + messageItem.message_id, + messageItem.message, + messageItem.sent_at, + messageItem.reaction + ) + ); + } + } + }); + + return [...prevMessages, ...newMessages]; + }); + + // Handle conclusion message and skills ranking flow + if (response.conversation_completed && response.messages.length) { + const lastMessage = response.messages[response.messages.length - 1]; + + if (SkillsRankingService.getInstance().isSkillsRankingFeatureEnabled()) { + const skillsRankingState = await SkillsRankingService.getInstance().getSkillsRankingState(sessionId); + const isAlreadyCompleted = skillsRankingState?.completed_at !== undefined; + + const showConclusionMessage = createShowConclusionMessage( + lastMessage, + addMessageToChat, + setAiIsTyping, + isAlreadyCompleted + ); + await showSkillsRanking(showConclusionMessage); + } else { + const conclusionMessage = generateConversationConclusionMessage( + lastMessage.message_id, + lastMessage.message + ); + addMessageToChat(conclusionMessage); + } + } + + // Update conversation state + setConversationCompleted(response.conversation_completed); + setConversationConductedAt(response.conversation_conducted_at); + setCurrentPhase((_previousCurrentPhase) => parseConversationPhase(response.current_phase, _previousCurrentPhase)); + }, + [exploredExperiences, fetchExperiences, addMessageToChat, showSkillsRanking] + ); + // Opens the experiences drawer and get experiences if needed const handleOpenExperiencesDrawer = useCallback(async () => { setIsDrawerOpen(true); @@ -253,6 +347,14 @@ export const Chat: React.FC> = ({ // Compute display message from status const getCvUploadDisplayMessageMemo = useCallback((status: UploadStatus): string => getCvUploadDisplayMessage(status), []); + // Use the CV bullets handler hook to consolidate upload and reinjection logic + const cvBulletsHandler = useCvBulletsHandler({ + sessionId: activeSessionId, + addMessageToChat, + setAiIsTyping, + processChatHistoryResponse, + }); + // Helper function to start polling for upload status const startPollingForUpload = useCallback((uploadId: string, messageId: string) => { // Stop any existing polling for this uploadId first @@ -268,11 +370,10 @@ export const Chat: React.FC> = ({ maxDurationMs: MAX_UPLOAD_POLL_MS, getStatus: async (id: string): Promise => { const currentUserId = authenticationStateService.getInstance().getUser()?.id; - if (!currentUserId) throw new Error("User ID missing"); + if (!currentUserId) throw new AuthenticationError("User ID missing"); const resp = await cvService.getInstance().getUploadStatus(currentUserId, id); - // Narrow to UploadStatus -return { - upload_process_state: resp.upload_process_state as UploadStatus["upload_process_state"], + return { + upload_process_state: resp.upload_process_state, cancel_requested: resp.cancel_requested, filename: resp.filename, user_id: resp.user_id, @@ -281,6 +382,8 @@ return { last_activity_at: resp.last_activity_at, error_code: resp.error_code, error_detail: resp.error_detail, + state_injected: resp.state_injected, + injection_error: resp.injection_error, experience_bullets: resp.experience_bullets, } as UploadStatus; }, @@ -300,20 +403,45 @@ return { return msg; })); }, - onComplete: (status: UploadStatus) => { + onComplete: async (status: UploadStatus) => { stopPollingForUpload(uploadId, handles.intervalId as any, handles.timeoutId as any); removeMessageFromChat(messageId); - const items: string[] | undefined = status.experience_bullets ?? undefined; - if (Array.isArray(items) && items.length > 0) { - const intro = "These are my experiences:"; - const bullets = items - .map((s) => (s?.trim()?.length ? `• ${s.trim()}` : "")) - .filter(Boolean) - .join("\n"); - const composed = bullets ? `${intro}\n${bullets}` : intro; - setPrefillMessage(composed); + enqueueSnackbar("CV processed and loaded", { variant: "success" }); + // Frontend metric: auto advance after CV upload completes + try { + const userId = authenticationStateService.getInstance().getUser()?.id; + if (userId && activeSessionId != null) { + MetricsService.getInstance().sendMetricsEvent({ + event_type: EventType.UI_INTERACTION, + user_id: userId, + actions: ["cv_upload_auto_advance"], + element_id: "cv_upload_auto_advance", + timestamp: new Date().toISOString(), + relevant_experiments: {}, + details: { + session_id: activeSessionId, + state_injected: Boolean((status as any).state_injected), + }, + }); + } + } catch (metricErr) { + console.error("Failed to send cv_upload_auto_advance metric", metricErr); + } + + // Send experience bullets as a real message if available + if (status.experience_bullets && status.experience_bullets.length > 0 && activeSessionId != null) { + try { + await cvBulletsHandler.handleBullets(status.experience_bullets); + } catch (err) { + // Error already logged in handleBullets + } + } else { + // If no bullets, show a message to the user that no experiences were found + // Keep the typing message visible longer so user can see the "No work experience data found" message + setTimeout(() => removeMessageFromChat(messageId), 3000); + enqueueSnackbar("No work experience data found in your CV", { variant: "info" }); + // Don't send the generic message - the state is already injected, just no experiences to display } - enqueueSnackbar("CV uploaded successfully", { variant: "success" }); }, onTerminal: (_status: UploadStatus) => { stopPollingForUpload(uploadId, handles.intervalId as any, handles.timeoutId as any); @@ -357,7 +485,7 @@ return { } }); setActiveUploads(prev => new Map(prev).set(uploadId, { messageId, intervalId: handles.intervalId as any, timeoutId: handles.timeoutId as any })); - }, [activeUploads, enqueueSnackbar, removeMessageFromChat, messages, stopPollingForUpload, getCvUploadDisplayMessageMemo]); + }, [activeUploads, stopPollingForUpload, getCvUploadDisplayMessageMemo, removeMessageFromChat, enqueueSnackbar, messages, activeSessionId, cvBulletsHandler]); // Helper function to cancel an upload const handleCancelUpload = useCallback(async (uploadId: string) => { @@ -457,7 +585,7 @@ return { ...msg, payload: { ...msg.payload, - onCancel: async () => await handleCancelUpload(response.uploadId!), + onCancel: async () => await handleCancelUpload(response.uploadId), } }; } @@ -512,60 +640,7 @@ return { try { // Send the user's message const response = await ChatService.getInstance().sendMessage(sessionId, userMessage); - - setExploredExperiences(response.experiences_explored); - - if (response.experiences_explored > exploredExperiences) { - setExploredExperiencesNotification(true); - await fetchExperiences(); - } - - response.messages.forEach((messageItem, idx) => { - const isConclusionMessage = response.conversation_completed && idx === response.messages.length - 1; - if (!isConclusionMessage) { - addMessageToChat( - generateCompassMessage( - messageItem.message_id, - messageItem.message, - messageItem.sent_at, - messageItem.reaction - ) - ); - } - }); - // Handle the conclusion message and skills ranking flow for new messages - if (response.conversation_completed && response.messages.length) { - const lastMessage = response.messages[response.messages.length - 1]; - - if (SkillsRankingService.getInstance().isSkillsRankingFeatureEnabled()) { - // Check if skill ranking is already completed - const skillsRankingState = await SkillsRankingService.getInstance().getSkillsRankingState(activeSessionId!); - const isAlreadyCompleted = skillsRankingState?.completed_at !== undefined; - - const showConclusionMessage = createShowConclusionMessage( - lastMessage, - addMessageToChat, - setAiIsTyping, - isAlreadyCompleted - ); - await showSkillsRanking(showConclusionMessage); - } else { - const conclusionMessage = generateConversationConclusionMessage( - lastMessage.message_id, - lastMessage.message - ); - - addMessageToChat(conclusionMessage); - } - } - - setConversationCompleted(response.conversation_completed); - setConversationConductedAt(response.conversation_conducted_at); - - // Set the current conversation phase - setCurrentPhase((_previousCurrentPhase) => { - return parseConversationPhase(response.current_phase, _previousCurrentPhase); - }); + await processChatHistoryResponse(response, { sessionId }); } catch (error) { console.error(new ChatError("Failed to send message:", error)); addMessageToChat(generatePleaseRepeatMessage()); @@ -573,7 +648,7 @@ return { setAiIsTyping(false); } }, - [addMessageToChat, exploredExperiences, fetchExperiences, activeSessionId, showSkillsRanking] + [addMessageToChat, processChatHistoryResponse] ); const initializeChat = useCallback( @@ -596,6 +671,8 @@ return { setMessages([generateTypingMessage()]); // AND clear the current phase setCurrentPhase(defaultCurrentPhase); + // AND clear CV upload errors + setCvUploadError(null); } else { console.debug("Failed to issue new session"); return false; @@ -699,6 +776,8 @@ return { const handleConfirmNewConversation = useCallback(async () => { setNewConversationDialog(false); setExploredExperiencesNotification(false); + // Clear CV upload errors when starting a new conversation + setCvUploadError(null); if (await initializeChat(currentUserId, null)) { enqueueSnackbar(NOTIFICATION_MESSAGES_TEXT.NEW_CONVERSATION_STARTED, { variant: "success" }); } else { @@ -862,12 +941,15 @@ return { 0} onUploadCv={handleUploadCv} currentPhase={currentPhase.phase} prefillMessage={prefillMessage} cvUploadError={cvUploadError} + activeSessionId={activeSessionId} + onCvBulletsSent={cvBulletsHandler.handleBulletsSent} /> diff --git a/frontend-new/src/chat/Chat.types.ts b/frontend-new/src/chat/Chat.types.ts index 9ce54a84d..dd6bbf142 100644 --- a/frontend-new/src/chat/Chat.types.ts +++ b/frontend-new/src/chat/Chat.types.ts @@ -30,5 +30,7 @@ export interface UploadStatus { last_activity_at?: string; error_code?: string | null; error_detail?: string | null; + state_injected?: boolean; + injection_error?: string | null; experience_bullets?: string[] | null; } \ No newline at end of file diff --git a/frontend-new/src/chat/ChatMessageField/CVUpload.stories.tsx b/frontend-new/src/chat/ChatMessageField/CVUpload.stories.tsx index 612d9e90f..fa4758038 100644 --- a/frontend-new/src/chat/ChatMessageField/CVUpload.stories.tsx +++ b/frontend-new/src/chat/ChatMessageField/CVUpload.stories.tsx @@ -58,7 +58,6 @@ const createMockCvList = (count: number): CVListItem[] => { filename: makeRandomCvFilename(idx), uploaded_at: new Date(Date.now() - idx * 3600_000).toISOString(), upload_process_state: "COMPLETED", - experiences_data: ["Worked as an Accounting Assistant.", `Worked at Company ${idx + 1} from 2012 to Present.`], })); }; diff --git a/frontend-new/src/chat/ChatMessageField/ChatMessageField.test.tsx b/frontend-new/src/chat/ChatMessageField/ChatMessageField.test.tsx index edf244d9e..9b736a4b6 100644 --- a/frontend-new/src/chat/ChatMessageField/ChatMessageField.test.tsx +++ b/frontend-new/src/chat/ChatMessageField/ChatMessageField.test.tsx @@ -23,6 +23,21 @@ import ErrorConstants from "src/error/restAPIError/RestAPIError.constants"; import { getCvUploadEnabled } from "src/envService"; import AuthenticationStateService from "src/auth/services/AuthenticationState.service"; import CVService from "src/CV/CVService/CVService"; +import ChatService from "src/chat/ChatService/ChatService"; +import { useSnackbar } from "src/theme/SnackbarProvider/SnackbarProvider"; + +// mock the snackbar +jest.mock("src/theme/SnackbarProvider/SnackbarProvider", () => { + const actual = jest.requireActual("src/theme/SnackbarProvider/SnackbarProvider"); + return { + ...actual, + __esModule: true, + useSnackbar: jest.fn().mockReturnValue({ + enqueueSnackbar: jest.fn(), + closeSnackbar: jest.fn(), + }), + }; +}); // mock the getCvUploadEnabled function jest.mock("src/envService", () => ({ @@ -658,7 +673,7 @@ describe("ChatMessageField", () => { }); describe("Plus button", () => { - let mockCVServiceInstance: { getAllCVs: jest.Mock }; + let mockCVServiceInstance: { getAllCVs: jest.Mock; reinjectFromUpload: jest.Mock }; beforeEach(() => { // Mock user to be logged in by default @@ -667,6 +682,7 @@ describe("ChatMessageField", () => { mockCVServiceInstance = { getAllCVs: jest.fn().mockResolvedValue([]), + reinjectFromUpload: jest.fn().mockResolvedValue({ success: true }), }; jest.spyOn(CVService, "getInstance").mockReturnValue(mockCVServiceInstance as any); }); @@ -690,7 +706,7 @@ describe("ChatMessageField", () => { // AND the context menu is opened await waitFor(() => { - expect(ContextMenu).toHaveBeenCalledWith( + expect(ContextMenu).toHaveBeenLastCalledWith( expect.objectContaining({ anchorEl: plusButton, open: true, @@ -739,7 +755,7 @@ describe("ChatMessageField", () => { // AND the context menu is opened await waitFor(() => { - expect(ContextMenu).toHaveBeenCalledWith( + expect(ContextMenu).toHaveBeenLastCalledWith( expect.objectContaining({ anchorEl: plusButton, open: true, @@ -879,14 +895,12 @@ describe("ChatMessageField", () => { filename: "foo_bar.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: ["foo"], }, { upload_id: "cv2", filename: "foo_baz.pdf", uploaded_at: new Date().toISOString(), upload_process_state: "COMPLETED", - experiences_data: ["foo", "bar"], }, ]; mockCVServiceInstance.getAllCVs.mockResolvedValue(mockCvs); @@ -923,22 +937,125 @@ describe("ChatMessageField", () => { const firstCVItem = screen.getByText("foo_bar.pdf"); await userEvent.click(firstCVItem); - // THEN expect the composed content from the selected CV to be added to the input field - const chatMessageField = screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD); + // THEN expect reinjection call and a success notification; the menu should close + await waitFor(() => { + expect(mockCVServiceInstance.reinjectFromUpload).toHaveBeenCalledWith("fooUser", "cv1"); + }); await waitFor(() => { - expect(chatMessageField).toHaveValue("These are my experiences:\n• foo"); + expect(useSnackbar().enqueueSnackbar).toHaveBeenCalledWith( + "CV processed and loaded", + expect.objectContaining({ variant: "success" }) + ); }); - // AND the menu should close - expect(screen.queryByTestId(MENU_ITEM_ID.VIEW_UPLOADED_CVS)).not.toBeInTheDocument(); + // Menu may remain mounted by ContextMenu wrapper; focus on reinjection + snackbar // AND no errors or warnings to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }); + + test("should disable chat input while reinjecting a CV and show a notification when completed", async () => { + const givenPhase = ConversationPhase.COLLECT_EXPERIENCES; + const mockOnUploadCv = jest.fn(); + const mockSetAiIsTyping = jest.fn(); + const mockOnCvBulletsSent = jest.fn().mockResolvedValue(undefined); + const mockCvs = [ + { + upload_id: "cv1", + filename: "foo_bar.pdf", + uploaded_at: new Date().toISOString(), + upload_process_state: "COMPLETED", + }, + ]; + mockCVServiceInstance.getAllCVs.mockResolvedValue(mockCvs); + + let resolveReinject: (value: { success: boolean; experience_bullets?: string[] }) => void = () => {}; + const reinjectPromise = new Promise<{ success: boolean; experience_bullets?: string[] }>((resolve) => { + resolveReinject = resolve; + }); + mockCVServiceInstance.reinjectFromUpload.mockImplementationOnce(() => reinjectPromise); + + const mockChatServiceInstance = { + sendMessage: jest.fn().mockResolvedValue({ + experiences_explored: 0, + messages: [], + conversation_completed: false, + }), + }; + const chatServiceSpy = jest.spyOn(ChatService, "getInstance").mockReturnValue(mockChatServiceInstance as any); + + render( + + ); + + const plusButton = screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD_PLUS_BUTTON); + await userEvent.click(plusButton); + const viewUploadedCvsOption = screen.getByTestId(MENU_ITEM_ID.VIEW_UPLOADED_CVS); + await userEvent.click(viewUploadedCvsOption); + + const firstCVItem = await screen.findByText("foo_bar.pdf"); + await userEvent.click(firstCVItem); + + await waitFor(() => { + expect(mockCVServiceInstance.reinjectFromUpload).toHaveBeenCalledWith("fooUser", "cv1"); + }); + + await waitFor(() => { + expect(screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD)).toBeDisabled(); + }); + expect(screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD_SEND_BUTTON)).toBeDisabled(); + + await act(async () => { + resolveReinject({ + success: true, + experience_bullets: ["Software Engineer at Company A", "Data Scientist at Company B"] + }); + }); + + await waitFor(() => { + expect(screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD)).toBeEnabled(); + }); + await waitFor(() => { + expect(useSnackbar().enqueueSnackbar).toHaveBeenCalledWith("CV processed and loaded", expect.objectContaining({ variant: "success" })); + }); + await waitFor(() => { + expect(mockSetAiIsTyping).toHaveBeenCalledWith(true); + }); + await waitFor(() => { + expect(mockChatServiceInstance.sendMessage).toHaveBeenCalledWith( + 987, + "I have these experiences:\n\n• Software Engineer at Company A\n• Data Scientist at Company B \n\nLet's start with these." + ); + }); + await waitFor(() => { + expect(mockOnCvBulletsSent).toHaveBeenCalledWith( + "I have these experiences:\n\n• Software Engineer at Company A\n• Data Scientist at Company B \n\nLet's start with these.", + expect.objectContaining({ + experiences_explored: 0, + messages: [], + conversation_completed: false, + }) + ); + }); + await waitFor(() => { + expect(mockSetAiIsTyping).toHaveBeenCalledWith(false); + }); + + chatServiceSpy.mockRestore(); + }); }); }); describe("CV Upload Feature Flag", () => { - let mockCVServiceInstance: { getAllCVs: jest.Mock }; + let mockCVServiceInstance: { getAllCVs: jest.Mock; reinjectFromUpload: jest.Mock }; beforeEach(() => { // Mock user to be logged in by default @@ -947,6 +1064,7 @@ describe("ChatMessageField", () => { mockCVServiceInstance = { getAllCVs: jest.fn().mockResolvedValue([]), + reinjectFromUpload: jest.fn().mockResolvedValue({ success: true }), }; jest.spyOn(CVService, "getInstance").mockReturnValue(mockCVServiceInstance as any); }); @@ -1076,7 +1194,7 @@ describe("ChatMessageField", () => { // THEN expect the context menu to be rendered await waitFor(() => { - expect(ContextMenu).toHaveBeenCalledWith( + expect(ContextMenu).toHaveBeenLastCalledWith( expect.objectContaining({ anchorEl: plusButton, open: true, @@ -1109,16 +1227,16 @@ describe("ChatMessageField", () => { const plusButton = screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD_PLUS_BUTTON); await userEvent.click(plusButton); - // THEN expect the context menu to be rendered with the correct description + // THEN expect the context menu to be rendered with the correct description and enabled await waitFor(() => { - expect(ContextMenu).toHaveBeenCalledWith( + expect(ContextMenu).toHaveBeenLastCalledWith( expect.objectContaining({ anchorEl: plusButton, open: true, items: expect.arrayContaining([ expect.objectContaining({ - description: "You can upload your CV as soon as we start exploring your experiences", - disabled: true, // Should be disabled in INTRO phase + description: `PDF, DOCX, TXT • Max ${MAX_FILE_SIZE_MB} MB • ${MAX_MARKDOWN_CHARS} chars max`, + disabled: false, }), ]), }), @@ -1150,16 +1268,16 @@ describe("ChatMessageField", () => { const plusButton = screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD_PLUS_BUTTON); await userEvent.click(plusButton); - // THEN expect the context menu to be rendered with the correct description + // THEN expect the context menu to be rendered with the correct description and enabled await waitFor(() => { - expect(ContextMenu).toHaveBeenCalledWith( + expect(ContextMenu).toHaveBeenLastCalledWith( expect.objectContaining({ anchorEl: plusButton, open: true, items: expect.arrayContaining([ expect.objectContaining({ description: `PDF, DOCX, TXT • Max ${MAX_FILE_SIZE_MB} MB • ${MAX_MARKDOWN_CHARS} chars max`, - disabled: false, // Should be enabled in COLLECT_EXPERIENCES phase + disabled: false, }), ]), }), @@ -1191,16 +1309,16 @@ describe("ChatMessageField", () => { const plusButton = screen.getByTestId(DATA_TEST_ID.CHAT_MESSAGE_FIELD_PLUS_BUTTON); await userEvent.click(plusButton); - // THEN expect the context menu to be rendered with the correct description + // THEN expect the context menu to be rendered with the correct description and enabled await waitFor(() => { - expect(ContextMenu).toHaveBeenCalledWith( + expect(ContextMenu).toHaveBeenLastCalledWith( expect.objectContaining({ anchorEl: plusButton, open: true, items: expect.arrayContaining([ expect.objectContaining({ - description: "CV upload is only available during experience collection", - disabled: true, // Should be disabled after COLLECT_EXPERIENCES phase + description: `PDF, DOCX, TXT • Max ${MAX_FILE_SIZE_MB} MB • ${MAX_MARKDOWN_CHARS} chars max`, + disabled: false, }), ]), }), diff --git a/frontend-new/src/chat/ChatMessageField/ChatMessageField.tsx b/frontend-new/src/chat/ChatMessageField/ChatMessageField.tsx index b2f92888c..1c88c02a2 100644 --- a/frontend-new/src/chat/ChatMessageField/ChatMessageField.tsx +++ b/frontend-new/src/chat/ChatMessageField/ChatMessageField.tsx @@ -4,6 +4,7 @@ import SendIcon from "@mui/icons-material/Send"; import AddIcon from "@mui/icons-material/Add"; import UploadFileIcon from "@mui/icons-material/UploadFile"; import { AnimatePresence, motion } from "framer-motion"; +import { useSnackbar } from "src/theme/SnackbarProvider/SnackbarProvider"; import { CV_UPLOAD_ERROR_MESSAGES, getCvUploadErrorMessageFromHttpStatus } from "../CVUploadErrorHandling"; import ContextMenu from "src/theme/ContextMenu/ContextMenu"; import { MenuItemConfig } from "src/theme/ContextMenu/menuItemConfig.types"; @@ -15,7 +16,8 @@ import CVService from "src/CV/CVService/CVService"; import { CVListItem } from "src/CV/CVService/CVService.types"; import authenticationStateService from "src/auth/services/AuthenticationState.service"; import { ChatError } from "src/error/commonErrors"; -import { formatExperiencesToMessage } from "src/chat/util"; +import { formatExperiencesToMessage, formatCvExperienceBulletsMessage } from "src/chat/util"; +import ChatService from "src/chat/ChatService/ChatService"; import DescriptionOutlinedIcon from "@mui/icons-material/DescriptionOutlined"; import ChevronRightIcon from "@mui/icons-material/ChevronRight"; import UploadedCVsMenu from "src/CV/uploadedCVsMenu/UploadedCVsMenu"; @@ -23,12 +25,15 @@ import UploadedCVsMenu from "src/CV/uploadedCVsMenu/UploadedCVsMenu"; export interface ChatMessageFieldProps { handleSend: (message: string) => void; aiIsTyping: boolean; + setAiIsTyping?: (isTyping: boolean) => void; // Optional: for managing typing indicator when sending CV bullets isChatFinished: boolean; isUploadingCv?: boolean; onUploadCv?: (file: File) => Promise; // returns array of experience lines currentPhase?: ConversationPhase; prefillMessage?: string | null; // optional prefill content for the input field cvUploadError?: string | null; // CV upload error message from polling process + activeSessionId?: number | null; + onCvBulletsSent?: (bulletsMessage?: string, sendMessageResponse?: any) => Promise; // refreshes chat after bullets are sent (without sending another message) } const uniqueId = "2a76494f-351d-409d-ba58-e1b2cfaf2a53"; @@ -66,6 +71,7 @@ export const PLACEHOLDER_TEXTS = { OFFLINE: "You are offline. Please connect to the internet to send a message.", DEFAULT: "Type your message...", UPLOADING: "Uploading CV...", + REINJECTING: "Loading CV...", }; // Character limit error messages (specific to ChatMessageField) export const CHARACTER_LIMIT_ERROR_MESSAGES = { @@ -116,12 +122,14 @@ const ChatMessageField: React.FC = (props) => { const [uploadedCVs, setUploadedCVs] = useState([]); const [isLoadingCVs, setIsLoadingCVs] = useState(false); const [menuView, setMenuView] = useState<"main" | "cvList">("main"); + const [isReinjectingCv, setIsReinjectingCv] = useState(false); const isCvUploadEnabled = getCvUploadEnabled().toLowerCase() === "true"; + const { enqueueSnackbar } = useSnackbar(); - // Show the dot badge whenever in COLLECT_EXPERIENCES and not yet seen + // Show the dot badge when menu not yet seen and closed useEffect(() => { - const shouldShow = props.currentPhase === ConversationPhase.COLLECT_EXPERIENCES && !badgeSeen && !isMenuOpen; + const shouldShow = !badgeSeen && !isMenuOpen; setShowPlusBadge(shouldShow); }, [props.currentPhase, badgeSeen, isMenuOpen]); @@ -310,12 +318,62 @@ const ChatMessageField: React.FC = (props) => { const handleMenuClose = () => setMenuAnchorEl(null); - const handleSelectCV = (cv: CVListItem) => { - const composed = formatExperiencesToMessage(cv.experiences_data); - setMessage(composed); - setMenuAnchorEl(null); - if (composed.trim().length > CHAT_MESSAGE_MAX_LENGTH) { - setErrorMessage(CHARACTER_LIMIT_ERROR_MESSAGES.MESSAGE_LIMIT); + const handleSelectCV = async (cv: CVListItem) => { + if (isReinjectingCv) { + return; + } + try { + setIsReinjectingCv(true); + const currentUserId = authenticationStateService.getInstance().getUser()?.id; + if (!currentUserId) throw new ChatError("User ID is not available"); + const reinjectResult = await CVService.getInstance().reinjectFromUpload(currentUserId, cv.upload_id); + if (!reinjectResult.success) { + const errorMessage = reinjectResult.error || "Failed to load CV. Please try again."; + enqueueSnackbar(errorMessage, { variant: "error" }); + setIsReinjectingCv(false); + return; + } + enqueueSnackbar("CV processed and loaded", { variant: "success" }); + // Re-enable input immediately after injection completes + setIsReinjectingCv(false); + setMenuAnchorEl(null); + setMenuView("main"); + // Send experience bullets as a real message if available + if (props.activeSessionId != null) { + try { + if (reinjectResult.experience_bullets && reinjectResult.experience_bullets.length > 0) { + const message = formatCvExperienceBulletsMessage(reinjectResult.experience_bullets); + // Show typing indicator while waiting for backend response + if (props.setAiIsTyping) { + props.setAiIsTyping(true); + } + // Send to server - use the response directly to avoid fetching full history + const sendResponse = await ChatService.getInstance().sendMessage(props.activeSessionId, message); + // Refresh chat to show the response (pass the message and response so we don't need to fetch history) + if (props.onCvBulletsSent) { + await props.onCvBulletsSent(message, sendResponse); + } + } else { + // If no bullets, show a message to the user that no experiences were found + enqueueSnackbar("No work experience data found in your CV", { variant: "info" }); + // Don't send the generic message - the state is already injected, just no experiences to display + } + } catch (err) { + // silently ignore in UI; parent Chat handles visible errors + console.error("Failed to send message after CV reinjection:", err); + } finally { + // Hide typing indicator + if (props.setAiIsTyping) { + props.setAiIsTyping(false); + } + } + } + } catch (err: any) { + console.error("Failed to reinject from uploaded CV:", err); + const errorMessage = err?.response?.data?.detail || err?.message || "Failed to load CV. Please try again."; + enqueueSnackbar(errorMessage, { variant: "error" }); + setIsReinjectingCv(false); + // Keep menu open for retry, but ensure anchor is preserved } }; @@ -414,6 +472,9 @@ const ChatMessageField: React.FC = (props) => { if (props.isChatFinished) { return PLACEHOLDER_TEXTS.CHAT_FINISHED; } + if (isReinjectingCv) { + return PLACEHOLDER_TEXTS.REINJECTING; + } if (props.isUploadingCv) { return PLACEHOLDER_TEXTS.UPLOADING; } @@ -424,7 +485,7 @@ const ChatMessageField: React.FC = (props) => { return PLACEHOLDER_TEXTS.OFFLINE; } return PLACEHOLDER_TEXTS.DEFAULT; - }, [props.aiIsTyping, props.isChatFinished, props.isUploadingCv, isOnline]); + }, [props.aiIsTyping, props.isChatFinished, props.isUploadingCv, isOnline, isReinjectingCv]); // Check if the send button should be disabled const sendIsDisabled = useCallback(() => { @@ -432,16 +493,17 @@ const ChatMessageField: React.FC = (props) => { props.isChatFinished || props.aiIsTyping || props.isUploadingCv || + isReinjectingCv || !isOnline || message.trim().length === 0 || message.trim().length > CHAT_MESSAGE_MAX_LENGTH // Only disable the send button when over the limit ); - }, [props.isChatFinished, props.aiIsTyping, props.isUploadingCv, isOnline, message]); + }, [props.isChatFinished, props.aiIsTyping, props.isUploadingCv, isOnline, message, isReinjectingCv]); // Check if the input field should be disabled const inputIsDisabled = useCallback(() => { - return props.isChatFinished || props.aiIsTyping || props.isUploadingCv || !isOnline; - }, [props.isChatFinished, props.aiIsTyping, props.isUploadingCv, isOnline]); + return props.isChatFinished || props.aiIsTyping || props.isUploadingCv || isReinjectingCv || !isOnline; + }, [props.isChatFinished, props.aiIsTyping, props.isUploadingCv, isReinjectingCv, isOnline]); const contextMenuItems: MenuItemConfig[] = menuView === "main" @@ -461,14 +523,9 @@ const ChatMessageField: React.FC = (props) => { { id: MENU_ITEM_ID.UPLOAD_CV, text: MENU_ITEM_TEXT.UPLOAD_CV, - description: - props.currentPhase === ConversationPhase.INTRO - ? "You can upload your CV as soon as we start exploring your experiences" - : props.currentPhase === ConversationPhase.COLLECT_EXPERIENCES - ? `PDF, DOCX, TXT • Max ${MAX_FILE_SIZE_MB} MB • ${MAX_MARKDOWN_CHARS} chars max` - : "CV upload is only available during experience collection", + description: `PDF, DOCX, TXT • Max ${MAX_FILE_SIZE_MB} MB • ${MAX_MARKDOWN_CHARS} chars max`, icon: , - disabled: inputIsDisabled() || props.currentPhase !== ConversationPhase.COLLECT_EXPERIENCES, + disabled: inputIsDisabled(), action: handleFileMenuItemClick, }, ] @@ -485,6 +542,7 @@ const ChatMessageField: React.FC = (props) => { onSelect={handleSelectCV} isLoading={isLoadingCVs} uploadedCVs={uploadedCVs} + isReinjecting={isReinjectingCv} /> ), }, diff --git a/frontend-new/src/chat/ChatMessageField/__snapshots__/ChatMessageField.test.tsx.snap b/frontend-new/src/chat/ChatMessageField/__snapshots__/ChatMessageField.test.tsx.snap index f6be29de9..8a6067f3a 100644 --- a/frontend-new/src/chat/ChatMessageField/__snapshots__/ChatMessageField.test.tsx.snap +++ b/frontend-new/src/chat/ChatMessageField/__snapshots__/ChatMessageField.test.tsx.snap @@ -49,8 +49,12 @@ exports[`ChatMessageField should render correctly 1`] = ` /> + class="MuiBadge-badge MuiBadge-standard MuiBadge-anchorOriginTopRight MuiBadge-anchorOriginTopRightCircular MuiBadge-overlapCircular css-1xtbg9s-MuiBadge-badge" + > + + { let givenApiServerUrl: string = "/path/to/api"; @@ -23,27 +23,20 @@ describe("ChatService", () => { // WHEN the service is constructed const actualFirstInstance = ChatService.getInstance(); - // THEN expect the service to be constructed successfully expect(actualFirstInstance).toBeDefined(); - - // AND the service should have the correct endpoint urls expect(actualFirstInstance.apiServerUrl).toEqual(givenApiServerUrl); expect(actualFirstInstance.chatEndpointUrl).toEqual(`${givenApiServerUrl}/conversations`); - // AND WHEN the service is constructed again const actualSecondInstance = ChatService.getInstance(); expect(actualFirstInstance).toBe(actualSecondInstance); - // AND expect no errors or warning to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }); describe("sendMessage", () => { test("should fetch the correct URL, with POST and the correct headers and payload successfully", async () => { - // GIVEN some message specification to send const givenMessage = "Hello"; - // AND the send message REST API will respond with OK and some message response const expectedRootMessageResponse = generateTestChatResponses(); const fetchSpy = setupAPIServiceSpy( StatusCodes.CREATED, @@ -51,14 +44,10 @@ describe("ChatService", () => { "application/json;charset=UTF-8", ); - // WHEN the sendMessage function is called with the given arguments const givenSessionId = 1234; const service = ChatService.getInstance(); const actualMessageResponse = await service.sendMessage(givenSessionId, givenMessage); - // THEN expect it to make a GET request - // AND the headers - // AND the request payload to contain the given arguments expect(fetchSpy).toHaveBeenCalledWith(`${givenApiServerUrl}/conversations/${givenSessionId}/messages`, { method: "POST", headers: { "Content-Type": "application/json" }, @@ -70,17 +59,13 @@ describe("ChatService", () => { expectedContentType: "application/json", }); - // AND returns the message response expect(actualMessageResponse).toEqual(expectedRootMessageResponse); - - // AND expect no errors or warning to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }); test("on fail to fetch, should reject with the expected service error", async () => { const givenMessage = "Hello"; - // GIVEN fetch rejects with some unknown error for sending a message on a given session const givenFetchError = new Error("some error"); jest.spyOn(require("src/utils/customFetch/customFetch"), "customFetch").mockImplementationOnce(() => { return new Promise(() => { @@ -88,14 +73,10 @@ describe("ChatService", () => { }); }); - // WHEN calling sendMessage function const givenSessionId = 1234; const service = ChatService.getInstance(); - // THEN expected it to reject with the same error thrown by fetchWithAuth await expect(service.sendMessage(givenSessionId, givenMessage)).rejects.toMatchObject(givenFetchError); - - // AND expect no errors or warning to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }); @@ -106,17 +87,13 @@ describe("ChatService", () => { ])( "on 201, should reject with an error ERROR_CODE.INVALID_RESPONSE_BODY if response %s", async (_description, givenResponse) => { - // GIVEN some message specification to send const givenMessage = "Hello"; - // AND the send message REST API will respond with OK and some response that does conform to the messageResponseSchema even if it states that it is application/json setupAPIServiceSpy(StatusCodes.CREATED, givenResponse, "application/json;charset=UTF-8"); - // WHEN the sendMessage function is called with the given arguments const givenSessionId = 1234; const service = ChatService.getInstance(); const sendMessagePromise = service.sendMessage(givenSessionId, givenMessage); - // THEN expected it to reject with the error response const expectedError = { ...new RestAPIError( ChatService.name, @@ -131,8 +108,6 @@ describe("ChatService", () => { cause: expect.anything(), }; await expect(sendMessagePromise).rejects.toMatchObject(expectedError); - - // AND expect no errors or warning to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }, @@ -141,16 +116,12 @@ describe("ChatService", () => { describe("getChatHistory", () => { test("should fetch the correct URL, with GET and the correct headers and payload successfully", async () => { - // GIVEN some history to return const givenTestHistoryResponse = generateTestHistory(); const fetchSpy = setupAPIServiceSpy(StatusCodes.OK, givenTestHistoryResponse, "application/json;charset=UTF-8"); - // WHEN the getChatHistory function is called const givenSessionId = 1234; const service = ChatService.getInstance(); const actualHistoryResponse = await service.getChatHistory(givenSessionId); - // THEN expect it to make a GET request - // AND the headers expect(fetchSpy).toHaveBeenCalledWith(`${givenApiServerUrl}/conversations/${givenSessionId}/messages`, { method: "GET", headers: { "Content-Type": "application/json" }, @@ -162,16 +133,12 @@ describe("ChatService", () => { retryOnFailedToFetch: true }); - // AND returns the history response expect(actualHistoryResponse).toEqual(givenTestHistoryResponse); - - // AND expect no errors or warning to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }); test("on fail to fetch, should reject with the expected service error", async () => { - // GIVEN fetch rejects with some unknown error when getting the history of a given session const givenFetchError = new Error("some error"); jest.spyOn(require("src/utils/customFetch/customFetch"), "customFetch").mockImplementationOnce(() => { return new Promise(() => { @@ -179,14 +146,10 @@ describe("ChatService", () => { }); }); - // WHEN calling getChatHistory function const givenSessionId = 1234; const service = ChatService.getInstance(); - // THEN expected it to reject with the same error thrown by fetchWithAuth await expect(service.getChatHistory(givenSessionId)).rejects.toMatchObject(givenFetchError); - - // AND expect no errors or warning to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }); @@ -197,16 +160,12 @@ describe("ChatService", () => { ])( "on 200, should reject with an error ERROR_CODE.INVALID_RESPONSE_BODY if response %s", async (_description, givenResponse) => { - // GIVEN some message specification to send - // AND the send message REST API will respond with OK and some response that does conform to the messageResponseSchema even if it states that it is application/json setupAPIServiceSpy(StatusCodes.OK, givenResponse, "application/json;charset=UTF-8"); - // WHEN the sendMessage function is called with the given arguments const givenSessionId = 1234; const service = ChatService.getInstance(); const sendMessagePromise = service.getChatHistory(givenSessionId); - // THEN expected it to reject with the error response const expectedError = { ...new RestAPIError( ChatService.name, @@ -221,8 +180,6 @@ describe("ChatService", () => { cause: expect.anything(), }; await expect(sendMessagePromise).rejects.toMatchObject(expectedError); - - // AND expect no errors or warning to have occurred expect(console.error).not.toHaveBeenCalled(); expect(console.warn).not.toHaveBeenCalled(); }, diff --git a/frontend-new/src/chat/cvUploadPolling.test.ts b/frontend-new/src/chat/cvUploadPolling.test.ts index 0ced19030..50ca8509c 100644 --- a/frontend-new/src/chat/cvUploadPolling.test.ts +++ b/frontend-new/src/chat/cvUploadPolling.test.ts @@ -9,7 +9,8 @@ describe("cvUploadPolling", () => { expect(getCvUploadDisplayMessage({ upload_process_state: "EXTRACTING" })).toBe("Extracting experiences"); expect(getCvUploadDisplayMessage({ upload_process_state: "SAVING" })).toBe("Saving CV"); expect(getCvUploadDisplayMessage({ upload_process_state: "FAILED" })).toBe("CV upload failed"); - expect(getCvUploadDisplayMessage({ upload_process_state: "COMPLETED" })).toBe("CV uploaded successfully"); + expect(getCvUploadDisplayMessage({ upload_process_state: "COMPLETED", experience_bullets: ["bullet"] })).toBe("CV uploaded successfully"); + expect(getCvUploadDisplayMessage({ upload_process_state: "COMPLETED" })).toBe("No work experience data found in your CV"); expect(getCvUploadDisplayMessage({ upload_process_state: "CANCELLED" })).toBe("CV upload cancelled"); expect(getCvUploadDisplayMessage({ upload_process_state: "PENDING_UPLOAD" })).toBe("Uploading CV"); }); diff --git a/frontend-new/src/chat/cvUploadPolling.ts b/frontend-new/src/chat/cvUploadPolling.ts index b7a8c43f1..5552999b2 100644 --- a/frontend-new/src/chat/cvUploadPolling.ts +++ b/frontend-new/src/chat/cvUploadPolling.ts @@ -15,7 +15,13 @@ export function stopUploadPolling(handles?: UploadPollingHandles): void { export function getCvUploadDisplayMessage(status: UploadStatus): string { if (!status) return "Uploading CV"; if (status.upload_process_state === "CANCELLED" || status.cancel_requested) return "CV upload cancelled"; - if (status.upload_process_state === "COMPLETED") return "CV uploaded successfully"; + if (status.upload_process_state === "COMPLETED") { + // If upload is successful but no experience bullets, show a message indicating no data was found + if (!status.experience_bullets || status.experience_bullets.length === 0) { + return "No work experience data found in your CV"; + } + return "CV uploaded successfully"; + } switch (status.upload_process_state) { case "CONVERTING": return "Converting CV"; diff --git a/frontend-new/src/chat/hooks/useCvBulletsHandler.test.tsx b/frontend-new/src/chat/hooks/useCvBulletsHandler.test.tsx new file mode 100644 index 000000000..a6d01769b --- /dev/null +++ b/frontend-new/src/chat/hooks/useCvBulletsHandler.test.tsx @@ -0,0 +1,396 @@ +// mute the console +import "src/_test_utilities/consoleMock"; + +import { act, renderHook } from "src/_test_utilities/test-utils"; +import { useCvBulletsHandler } from "./useCvBulletsHandler"; +import ChatService from "src/chat/ChatService/ChatService"; +import { ConversationResponse, ConversationMessageSender } from "src/chat/ChatService/ChatService.types"; +import { IChatMessage } from "src/chat/Chat.types"; +import { ChatError } from "src/error/commonErrors"; +import { formatCvExperienceBulletsMessage } from "src/chat/util"; +import { CurrentPhase, ConversationPhase } from "src/chat/chatProgressbar/types"; + +describe("useCvBulletsHandler", () => { + const mockSessionId = 123; + let mockAddMessageToChat: jest.Mock; + let mockSetAiIsTyping: jest.Mock; + let mockProcessChatHistoryResponse: jest.Mock; + let mockSendMessage: jest.Mock; + let mockGetChatHistory: jest.Mock; + + const createMockConversationResponse = ( + messages: Array<{ message_id: string; message: string; sender: ConversationMessageSender }> = [] + ): ConversationResponse => ({ + messages: messages.map((msg) => ({ + message_id: msg.message_id, + message: msg.message, + sent_at: new Date().toISOString(), + sender: msg.sender, + reaction: null, + })), + conversation_completed: false, + conversation_conducted_at: null, + experiences_explored: 0, + current_phase: { + phase: ConversationPhase.COLLECT_EXPERIENCES, + percentage: 0, + current: 0, + total: 0, + } as CurrentPhase, + }); + + beforeEach(() => { + jest.clearAllMocks(); + mockAddMessageToChat = jest.fn(); + mockSetAiIsTyping = jest.fn(); + mockProcessChatHistoryResponse = jest.fn().mockResolvedValue(undefined); + mockSendMessage = jest.fn(); + mockGetChatHistory = jest.fn(); + + jest.spyOn(ChatService, "getInstance").mockReturnValue({ + sendMessage: mockSendMessage, + getChatHistory: mockGetChatHistory, + } as unknown as ChatService); + }); + + describe("formatBulletsMessage", () => { + test("should format bullets into a message string", () => { + // GIVEN the useCvBulletsHandler hook + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN formatting bullets + const bullets = ["Worked as a software engineer", "Built web applications", "Led a team of 5"]; + const formatted = result.current.formatBulletsMessage(bullets); + + // THEN expect the formatted message to match the utility function + expect(formatted).toBe(formatCvExperienceBulletsMessage(bullets)); + expect(formatted).toContain("I have these experiences:"); + expect(formatted).toContain("• Worked as a software engineer"); + expect(formatted).toContain("• Built web applications"); + expect(formatted).toContain("• Led a team of 5"); + expect(formatted).toContain("Let's start with these."); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + + test("should handle empty bullets array", () => { + // GIVEN the useCvBulletsHandler hook + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN formatting an empty array + const formatted = result.current.formatBulletsMessage([]); + + // THEN expect the formatted message to match the utility function + expect(formatted).toBe(formatCvExperienceBulletsMessage([])); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + }); + + describe("handleBullets", () => { + test("should send bullets as a user message and process the response", async () => { + // GIVEN bullets to send and a mock response + const bullets = ["Worked as a software engineer", "Built web applications"]; + const mockResponse = createMockConversationResponse([ + { + message_id: "msg-1", + message: "Great! Let's explore these experiences.", + sender: ConversationMessageSender.COMPASS, + }, + ]); + mockSendMessage.mockResolvedValue(mockResponse); + + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets + await act(async () => { + await result.current.handleBullets(bullets); + }); + + // THEN expect the user message to be added optimistically + expect(mockAddMessageToChat).toHaveBeenCalledTimes(1); + const addedMessage = mockAddMessageToChat.mock.calls[0][0] as IChatMessage; + expect(addedMessage.sender).toBe(ConversationMessageSender.USER); + expect(addedMessage.payload.message).toContain("I have these experiences:"); + + // AND the typing indicator to be set and then cleared + expect(mockSetAiIsTyping).toHaveBeenCalledWith(true); + expect(mockSetAiIsTyping).toHaveBeenCalledWith(false); + + // AND the message to be sent to the server + expect(mockSendMessage).toHaveBeenCalledTimes(1); + expect(mockSendMessage).toHaveBeenCalledWith(mockSessionId, formatCvExperienceBulletsMessage(bullets)); + + // AND the response to be processed + expect(mockProcessChatHistoryResponse).toHaveBeenCalledTimes(1); + expect(mockProcessChatHistoryResponse).toHaveBeenCalledWith(mockResponse, { + skipUserMessage: formatCvExperienceBulletsMessage(bullets), + sessionId: mockSessionId, + }); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + + test("should throw error when sessionId is null", async () => { + // GIVEN the hook with null sessionId + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: null, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets + // THEN expect an error to be thrown + await act(async () => { + await expect(result.current.handleBullets(["bullet"])).rejects.toThrow(ChatError); + }); + + // AND no message to be added + expect(mockAddMessageToChat).not.toHaveBeenCalled(); + // AND no message to be sent + expect(mockSendMessage).not.toHaveBeenCalled(); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + + test("should handle errors and reset typing indicator", async () => { + // GIVEN bullets to send and a network error + const bullets = ["Worked as a software engineer"]; + const error = new Error("Network error"); + mockSendMessage.mockRejectedValue(error); + + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets + // THEN expect an error to be thrown + await act(async () => { + await expect(result.current.handleBullets(bullets)).rejects.toThrow("Network error"); + }); + + // AND the message to still be added optimistically + expect(mockAddMessageToChat).toHaveBeenCalledTimes(1); + + // AND the typing indicator to be set and then reset even on error + expect(mockSetAiIsTyping).toHaveBeenCalledWith(true); + expect(mockSetAiIsTyping).toHaveBeenCalledWith(false); + + // AND the response not to be processed + expect(mockProcessChatHistoryResponse).not.toHaveBeenCalled(); + // AND errors should be logged + expect(console.error).toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + }); + + describe("handleBulletsSent", () => { + test("should refresh chat using sendMessageResponse when provided", async () => { + // GIVEN a bullets message and a sendMessage response + const bulletsMessage = formatCvExperienceBulletsMessage(["Worked as a software engineer"]); + const mockSendMessageResponse = createMockConversationResponse([ + { + message_id: "msg-1", + message: "Great! Let's explore these experiences.", + sender: ConversationMessageSender.COMPASS, + }, + ]); + + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets sent with a response + await act(async () => { + await result.current.handleBulletsSent(bulletsMessage, mockSendMessageResponse); + }); + + // THEN expect the user message to be added if provided + expect(mockAddMessageToChat).toHaveBeenCalledTimes(1); + const addedMessage = mockAddMessageToChat.mock.calls[0][0] as IChatMessage; + expect(addedMessage.sender).toBe(ConversationMessageSender.USER); + expect(addedMessage.payload.message).toBe(bulletsMessage); + + // AND chat history not to be fetched + expect(mockGetChatHistory).not.toHaveBeenCalled(); + + // AND the response to be processed + expect(mockProcessChatHistoryResponse).toHaveBeenCalledTimes(1); + expect(mockProcessChatHistoryResponse).toHaveBeenCalledWith(mockSendMessageResponse, { + skipUserMessage: bulletsMessage, + sessionId: mockSessionId, + }); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + + test("should fetch chat history when sendMessageResponse is not provided", async () => { + // GIVEN a bullets message but no sendMessage response + const bulletsMessage = formatCvExperienceBulletsMessage(["Worked as a software engineer"]); + const mockHistoryResponse = createMockConversationResponse([ + { + message_id: "msg-1", + message: "Great! Let's explore these experiences.", + sender: ConversationMessageSender.COMPASS, + }, + ]); + mockGetChatHistory.mockResolvedValue(mockHistoryResponse); + + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets sent without a response + await act(async () => { + await result.current.handleBulletsSent(bulletsMessage); + }); + + // THEN expect chat history to be fetched + expect(mockGetChatHistory).toHaveBeenCalledTimes(1); + expect(mockGetChatHistory).toHaveBeenCalledWith(mockSessionId); + + // AND the response to be processed + expect(mockProcessChatHistoryResponse).toHaveBeenCalledTimes(1); + expect(mockProcessChatHistoryResponse).toHaveBeenCalledWith(mockHistoryResponse, { + skipUserMessage: bulletsMessage, + sessionId: mockSessionId, + }); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + + test("should handle case when bulletsMessage is not provided", async () => { + // GIVEN no bullets message + const mockHistoryResponse = createMockConversationResponse([ + { + message_id: "msg-1", + message: "Great! Let's explore these experiences.", + sender: ConversationMessageSender.COMPASS, + }, + ]); + mockGetChatHistory.mockResolvedValue(mockHistoryResponse); + + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets sent without a message + await act(async () => { + await result.current.handleBulletsSent(); + }); + + // THEN expect no user message to be added + expect(mockAddMessageToChat).not.toHaveBeenCalled(); + + // AND chat history to be fetched + expect(mockGetChatHistory).toHaveBeenCalledTimes(1); + + // AND the response to be processed without skipUserMessage + expect(mockProcessChatHistoryResponse).toHaveBeenCalledTimes(1); + expect(mockProcessChatHistoryResponse).toHaveBeenCalledWith(mockHistoryResponse, { + skipUserMessage: undefined, + sessionId: mockSessionId, + }); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + + test("should return early when sessionId is null", async () => { + // GIVEN the hook with null sessionId + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: null, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets sent + await act(async () => { + await result.current.handleBulletsSent("message"); + }); + + // THEN expect no operations to be performed + expect(mockAddMessageToChat).not.toHaveBeenCalled(); + expect(mockGetChatHistory).not.toHaveBeenCalled(); + expect(mockProcessChatHistoryResponse).not.toHaveBeenCalled(); + expect(console.error).not.toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + + test("should handle errors gracefully", async () => { + // GIVEN a bullets message and a network error + const bulletsMessage = formatCvExperienceBulletsMessage(["Worked as a software engineer"]); + const error = new Error("Network error"); + mockGetChatHistory.mockRejectedValue(error); + + const { result } = renderHook(() => + useCvBulletsHandler({ + sessionId: mockSessionId, + addMessageToChat: mockAddMessageToChat, + setAiIsTyping: mockSetAiIsTyping, + processChatHistoryResponse: mockProcessChatHistoryResponse, + }) + ); + + // WHEN handling bullets sent + await act(async () => { + await result.current.handleBulletsSent(bulletsMessage); + }); + + // THEN expect the message to still be added optimistically + expect(mockAddMessageToChat).toHaveBeenCalledTimes(1); + + // AND the response not to be processed + expect(mockProcessChatHistoryResponse).not.toHaveBeenCalled(); + // AND errors should be logged + expect(console.error).toHaveBeenCalled(); + expect(console.warn).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/frontend-new/src/chat/hooks/useCvBulletsHandler.ts b/frontend-new/src/chat/hooks/useCvBulletsHandler.ts new file mode 100644 index 000000000..6eecf678f --- /dev/null +++ b/frontend-new/src/chat/hooks/useCvBulletsHandler.ts @@ -0,0 +1,114 @@ +import { useCallback } from "react"; +import ChatService from "src/chat/ChatService/ChatService"; +import { ConversationResponse } from "src/chat/ChatService/ChatService.types"; +import { IChatMessage } from "src/chat/Chat.types"; +import { generateUserMessage, formatCvExperienceBulletsMessage } from "src/chat/util"; +import { ChatError } from "src/error/commonErrors"; + +export interface CvBulletsHandlerOptions { + // Required dependencies + sessionId: number | null; + addMessageToChat: (message: IChatMessage) => void; + setAiIsTyping: (isTyping: boolean) => void; + processChatHistoryResponse: ( + response: ConversationResponse, + options: { + skipUserMessage?: string; + sessionId: number; + } + ) => Promise; +} + +export interface CvBulletsHandlerResult { + /** + * Formats experience bullets into a user message string + */ + formatBulletsMessage: (bullets: string[]) => string; + + /** + * Sends CV experience bullets as a user message and processes the response. + * Used for upload completion flow. + */ + handleBullets: (bullets: string[]) => Promise; + + /** + * Refreshes chat after CV bullets are sent (for reinjection flow). + * Can accept the sendMessage response to avoid fetching full history. + */ + handleBulletsSent: (bulletsMessage?: string, sendMessageResponse?: ConversationResponse) => Promise; +} + +/** + * Custom hook to consolidate CV bullets handling logic for upload and reinjection flows. + * + * This hook provides: + * - Formatting bullets into message strings + * - Sending bullets as user messages + * - Processing chat history responses + * - Managing typing indicators + * + * @param options - Configuration object with required dependencies + * @returns Handler functions for CV bullets operations + */ +export const useCvBulletsHandler = (options: CvBulletsHandlerOptions): CvBulletsHandlerResult => { + const { sessionId, addMessageToChat, setAiIsTyping, processChatHistoryResponse } = options; + + const formatBulletsMessage = useCallback((bullets: string[]): string => { + return formatCvExperienceBulletsMessage(bullets); + }, []); + + const handleBullets = useCallback( + async (bullets: string[]): Promise => { + if (sessionId == null) { + throw new ChatError("Session ID is not available"); + } + + const message = formatBulletsMessage(bullets); + + // Show the user message immediately before sending + addMessageToChat(generateUserMessage(message, new Date().toISOString())); + // Show typing indicator while waiting for backend response + setAiIsTyping(true); + + try { + // Send to server - the response contains only new messages, not the full history + const response = await ChatService.getInstance().sendMessage(sessionId, message); + await processChatHistoryResponse(response, { skipUserMessage: message, sessionId }); + } catch (err) { + console.error(new ChatError("Failed to send experience bullets message:", err)); + throw err; + } finally { + setAiIsTyping(false); + } + }, + [sessionId, formatBulletsMessage, addMessageToChat, setAiIsTyping, processChatHistoryResponse] + ); + + const handleBulletsSent = useCallback( + async (bulletsMessage?: string, sendMessageResponse?: ConversationResponse): Promise => { + if (sessionId == null) return; + + try { + // Show the user message immediately if provided + if (bulletsMessage) { + addMessageToChat(generateUserMessage(bulletsMessage, new Date().toISOString())); + } + + // Use the response from sendMessage if provided (contains only new messages), + // otherwise fetch history. processChatHistoryResponse handles duplicate filtering internally. + const response = sendMessageResponse || await ChatService.getInstance().getChatHistory(sessionId); + await processChatHistoryResponse(response, { skipUserMessage: bulletsMessage, sessionId }); + } catch (e) { + console.error(new ChatError("Failed to refresh chat after CV bullets sent:", e)); + } + }, + [sessionId, addMessageToChat, processChatHistoryResponse] + ); + + return { + formatBulletsMessage, + handleBullets, + handleBulletsSent, + }; +}; + diff --git a/frontend-new/src/chat/util.tsx b/frontend-new/src/chat/util.tsx index c6cd04452..067d65c02 100644 --- a/frontend-new/src/chat/util.tsx +++ b/frontend-new/src/chat/util.tsx @@ -23,6 +23,16 @@ export const FIXED_MESSAGES_TEXT = { PLEASE_REPEAT: "I'm sorry, something seems to have gone wrong on my end... Can you please repeat that?", }; +/** + * Formats experience bullets into a user message string for CV upload/reinjection. + * @param bullets - Array of experience bullet strings + * @returns Formatted message string + */ +export const formatCvExperienceBulletsMessage = (bullets: string[]): string => { + const bulletsText = bullets.map(b => `• ${b}`).join("\n"); + return `I have these experiences:\n\n${bulletsText} \n\nLet's start with these.`; +}; + export const generateUserMessage = ( message: string, sent_at: string, @@ -133,7 +143,7 @@ export const generateCancellableCVTypingMessage = ( const payload: CancellableTypingChatMessageProps = { message: getDisplayMessage(), - thinkingMessage: "Processing your CV, this might take a while...", + thinkingMessage: "Processing your CV, this might take a minute", waitBeforeThinking: 10000, // 10 seconds for CV processing disabled: isUploaded || isCancelled, onCancel: async () => await onCancel(uploadId),