From 37fcdaad9c0f894bb24e5662852eafffef60e917 Mon Sep 17 00:00:00 2001 From: Robbie Court Date: Fri, 26 Jun 2026 09:53:24 +0000 Subject: [PATCH] Merge synonyms by (scope, label) with combined refs - collapse pub_syn so each synonym shows once with its list of refs - render every attributed pub as a markdown ref from its short_form - show synonym type (e.g. name_in_banc) as an unlinked ref when the only pub is the Unattributed placeholder; omit when no type - map opaque OMO synonym-type ids to readable labels - add tests --- src/test/test_merge_synonyms.py | 70 ++++++++++++++++++++++ src/vfbquery/term_info_queries.py | 98 +++++++++++++++++++++++++++++++ src/vfbquery/vfb_queries.py | 28 ++------- 3 files changed, 172 insertions(+), 24 deletions(-) create mode 100644 src/test/test_merge_synonyms.py diff --git a/src/test/test_merge_synonyms.py b/src/test/test_merge_synonyms.py new file mode 100644 index 0000000..ae823c6 --- /dev/null +++ b/src/test/test_merge_synonyms.py @@ -0,0 +1,70 @@ +import unittest +from vfbquery.term_info_queries import deserialize_term_info, synonym_type_label + + +# Trimmed real medulla (FBbt_00003748) term_info: the synonym block only. +MEDULLA = """ +{"term": {"core": {"iri": "http://purl.obolibrary.org/obo/FBbt_00003748", "symbol": "", "types": ["Entity", "Class", "Anatomy"], "short_form": "FBbt_00003748", "label": "medulla"}, "description": [], "comment": []}, + "query": "Get JSON for Class", "version": "test", "parents": [], "relationships": [], "xrefs": [], "anatomy_channel_image": [], + "pub_syn": [ + {"synonym": {"scope": "has_related_synonym", "label": "ME_L", "type": "http://purl.obolibrary.org/obo/fbbt#name_in_flywire_fafb"}, "pub": {"core": {"short_form": "FBrf0260535", "types": ["pub"], "label": "Schlegel et al., 2024"}, "FlyBase": "FBrf0260535", "PubMed": "", "DOI": ""}}, + {"synonym": {"scope": "has_related_synonym", "label": "ME_L", "type": "http://purl.obolibrary.org/obo/fbbt#name_in_banc"}, "pub": {"core": {"short_form": "doi_10_1101_2025_07_31_667571", "types": ["pub"], "label": "Bates et al., 2025"}, "FlyBase": "", "PubMed": "", "DOI": "10.1101/2025.07.31.667571"}}, + {"synonym": {"scope": "has_related_synonym", "label": "ME_L", "type": "http://purl.obolibrary.org/obo/fbbt#name_in_banc"}, "pub": {"core": {"short_form": "Unattributed", "types": ["pub"], "label": ""}, "FlyBase": "", "PubMed": "", "DOI": ""}}, + {"synonym": {"scope": "has_related_synonym", "label": "m", "type": ""}, "pub": {"core": {"short_form": "Unattributed", "types": ["pub"], "label": ""}, "FlyBase": "", "PubMed": "", "DOI": ""}}, + {"synonym": {"scope": "has_exact_synonym", "label": "ME", "type": "http://purl.obolibrary.org/obo/fbbt#BRAIN_NAME_ABV"}, "pub": {"core": {"short_form": "FBrf0224194", "types": ["pub"], "label": "Ito et al., 2014"}, "FlyBase": "FBrf0224194", "PubMed": "", "DOI": ""}} + ], + "def_pubs": [], "targeting_splits": []} +""" + + +class MergeSynonymsTest(unittest.TestCase): + def setUp(self): + self.syns = deserialize_term_info(MEDULLA).get_merged_synonyms() + self.by_label = {s["label"]: s for s in self.syns} + + def test_each_synonym_appears_once(self): + labels = [s["label"] for s in self.syns] + self.assertEqual(sorted(labels), ["ME", "ME_L", "m"]) + self.assertEqual(len(labels), len(set(labels))) + + def test_multi_ref_synonym_merged(self): + # ME_L asserted by flywire + banc -> single entry, both refs, no Unattributed + pub = self.by_label["ME_L"]["publication"] + self.assertIn("Schlegel et al., 2024", pub) + self.assertIn("Bates et al., 2025", pub) + self.assertNotIn("Unattributed", pub) + + def test_attributed_pubs_are_markdown_links(self): + # every pub with a short_form/id must render as a markdown ref + self.assertIn("[Schlegel et al., 2024](FBrf0260535)", self.by_label["ME_L"]["publication"]) + self.assertIn("[Bates et al., 2025](doi_10_1101_2025_07_31_667571)", self.by_label["ME_L"]["publication"]) + + def test_unattributed_with_type_shows_type_token(self): + # name_in_banc -> Unattributed: surface the type as a plain (unlinked) ref + pub = self.by_label["ME_L"]["publication"] + self.assertIn("name_in_banc", pub) + self.assertNotIn("[name_in_banc]", pub) # not a link + + def test_unattributed_only_no_type_has_no_publication(self): + # 'm' is backed only by Unattributed with no type -> shown with no ref + self.assertNotIn("publication", self.by_label["m"]) + + def test_attributed_single_ref_kept(self): + self.assertIn("[Ito et al., 2014](FBrf0224194)", self.by_label["ME"]["publication"]) + + +class SynonymTypeLabelTest(unittest.TestCase): + def test_opaque_omo_ids_mapped(self): + self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/OMO_0003000"), "abbreviation") + self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/OMO_0003003"), "layperson synonym") + + def test_fragment_fallback(self): + self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/fbbt#name_in_banc"), "name_in_banc") + self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/ncbitaxon#scientific_name"), "scientific_name") + + def test_empty(self): + self.assertEqual(synonym_type_label(""), "") + + +if __name__ == "__main__": + unittest.main() diff --git a/src/vfbquery/term_info_queries.py b/src/vfbquery/term_info_queries.py index 9502f24..4969433 100644 --- a/src/vfbquery/term_info_queries.py +++ b/src/vfbquery/term_info_queries.py @@ -582,6 +582,82 @@ def get_synonyms(self) -> List[str]: return [str(syn) for syn in set(self.pub_syn) if syn] return list() + def get_merged_synonyms(self) -> List[dict]: + """Merge pub_syn into one entry per (scope, label) with the combined + list of refs. + + The same synonym is often asserted by several datasets/papers, so + pub_syn holds one entry per (synonym, pub). This collapses them so each + synonym is shown once with the combined refs: + + - any pub with a real id/short_form is rendered as a markdown link; + - the 'Unattributed' placeholder pub is never linked, but if the entry + carries a synonym type (e.g. name_in_banc) that type is shown as a + plain-text ref, since it is useful provenance for the user; + - a synonym backed only by Unattributed with no type is shown with no + ref at all. + """ + def pub_ref(pub): + """Markdown ref for an attributed pub, or '' for Unattributed/none.""" + core = getattr(pub, 'core', None) if pub else None + if not core: + return "" + sf = getattr(core, 'short_form', '') or "" + if not sf or sf == "Unattributed": + return "" + micro = getattr(pub, 'microref', '') or "" + label = getattr(core, 'label', '') or "" + if micro: + text = micro + elif label: + parts = label.split(",") + text = (parts[0] + "," + parts[1]) if len(parts) > 1 else label + else: + text = sf + return get_link(text, sf) + + def type_token(syn): + """Readable label for the synonym type, e.g. name_in_banc or + 'abbreviation' for opaque OMO ids.""" + return synonym_type_label(getattr(syn.synonym, 'type', '') or "") + + grouped = {} + order = [] + for syn in (self.pub_syn or []): + if not (hasattr(syn, 'synonym') and syn.synonym): + continue + label = getattr(syn.synonym, 'label', "") or "" + scope = getattr(syn.synonym, 'scope', "") or "exact" + stype = getattr(syn.synonym, 'type', "") or "synonym" + key = (scope, label) + if key not in grouped: + grouped[key] = {"label": label, "scope": scope, "type": stype, "refs": []} + order.append(key) + entry = grouped[key] + entry_pubs = list(getattr(syn, 'pubs', None) or []) + if getattr(syn, 'pub', None): + entry_pubs.append(syn.pub) + real_refs = [r for r in (pub_ref(p) for p in entry_pubs) if r] + if real_refs: + for ref in real_refs: + if ref not in entry["refs"]: + entry["refs"].append(ref) + else: + # no attributed pub for this assertion: fall back to the + # synonym type as an unlinked ref (e.g. name_in_banc) + tok = type_token(syn) + if tok and tok not in entry["refs"]: + entry["refs"].append(tok) + + result = [] + for key in order: + entry = grouped[key] + synonym = {"label": entry["label"], "scope": entry["scope"], "type": entry["type"]} + if entry["refs"]: + synonym["publication"] = ", ".join(entry["refs"]) + result.append(synonym) + return result + def get_references(self) -> List[dict]: results = list() if self.def_pubs: @@ -741,6 +817,28 @@ def get_image(data: str, name: str, reference: str): return image +# Display labels for synonym types whose IRI fragment is an opaque id rather +# than human-readable text (OMO synonym-type ids). Other synonym types (e.g. +# fbbt#name_in_banc, ncbitaxon#scientific_name) already read sensibly as their +# fragment, so are left as-is. Keyed by IRI fragment / short id. +SYNONYM_TYPE_LABELS = { + "OMO_0003000": "abbreviation", + "OMO_0003003": "layperson synonym", +} + + +def synonym_type_label(type_iri: str) -> str: + """Human-readable label for a synonym-type IRI. + + Returns the curated label for opaque OMO ids, otherwise the IRI fragment + (e.g. name_in_banc, scientific_name). Empty string for no type. + """ + if not type_iri: + return "" + frag = type_iri.split('#')[-1].split('/')[-1] + return SYNONYM_TYPE_LABELS.get(frag, frag) + + def get_link(text: str, link: str) -> str: """ Creates a markdown formatted link string. diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index 8657b35..464cc41 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -1205,31 +1205,11 @@ def term_info_parse_object(results, short_form): termInfo["Publications"] = publications - # Add Synonyms for Class entities + # Add Synonyms for Class entities. pub_syn holds one entry per + # (synonym, pub); get_merged_synonyms() collapses these to one entry per + # synonym with the combined refs and drops the Unattributed placeholder. if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0: - synonyms = [] - for syn in vfbTerm.pub_syn: - if hasattr(syn, 'synonym') and syn.synonym: - synonym = {} - synonym["label"] = syn.synonym.label if hasattr(syn.synonym, 'label') else "" - synonym["scope"] = syn.synonym.scope if hasattr(syn.synonym, 'scope') else "exact" - synonym["type"] = syn.synonym.type if hasattr(syn.synonym, 'type') else "synonym" - - if hasattr(syn, 'pubs') and syn.pubs: - pub_refs = [] - for pub in syn.pubs: - if hasattr(pub, 'get_microref') and pub.get_microref(): - pub_refs.append(pub.get_microref()) - - if pub_refs: - # Join multiple publication references with commas - synonym["publication"] = ", ".join(pub_refs) - # Fallback to single pub if pubs collection not available - elif hasattr(syn, 'pub') and syn.pub and hasattr(syn.pub, 'get_microref'): - synonym["publication"] = syn.pub.get_microref() - - synonyms.append(synonym) - + synonyms = vfbTerm.get_merged_synonyms() # Only add the synonyms if we found any if synonyms: termInfo["Synonyms"] = synonyms