Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions src/test/test_merge_synonyms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import unittest
from vfbquery.term_info_queries import deserialize_term_info, synonym_type_label


# Trimmed real medulla (FBbt_00003748) term_info: the synonym block only.
MEDULLA = """
{"term": {"core": {"iri": "http://purl.obolibrary.org/obo/FBbt_00003748", "symbol": "", "types": ["Entity", "Class", "Anatomy"], "short_form": "FBbt_00003748", "label": "medulla"}, "description": [], "comment": []},
"query": "Get JSON for Class", "version": "test", "parents": [], "relationships": [], "xrefs": [], "anatomy_channel_image": [],
"pub_syn": [
{"synonym": {"scope": "has_related_synonym", "label": "ME_L", "type": "http://purl.obolibrary.org/obo/fbbt#name_in_flywire_fafb"}, "pub": {"core": {"short_form": "FBrf0260535", "types": ["pub"], "label": "Schlegel et al., 2024"}, "FlyBase": "FBrf0260535", "PubMed": "", "DOI": ""}},
{"synonym": {"scope": "has_related_synonym", "label": "ME_L", "type": "http://purl.obolibrary.org/obo/fbbt#name_in_banc"}, "pub": {"core": {"short_form": "doi_10_1101_2025_07_31_667571", "types": ["pub"], "label": "Bates et al., 2025"}, "FlyBase": "", "PubMed": "", "DOI": "10.1101/2025.07.31.667571"}},
{"synonym": {"scope": "has_related_synonym", "label": "ME_L", "type": "http://purl.obolibrary.org/obo/fbbt#name_in_banc"}, "pub": {"core": {"short_form": "Unattributed", "types": ["pub"], "label": ""}, "FlyBase": "", "PubMed": "", "DOI": ""}},
{"synonym": {"scope": "has_related_synonym", "label": "m", "type": ""}, "pub": {"core": {"short_form": "Unattributed", "types": ["pub"], "label": ""}, "FlyBase": "", "PubMed": "", "DOI": ""}},
{"synonym": {"scope": "has_exact_synonym", "label": "ME", "type": "http://purl.obolibrary.org/obo/fbbt#BRAIN_NAME_ABV"}, "pub": {"core": {"short_form": "FBrf0224194", "types": ["pub"], "label": "Ito et al., 2014"}, "FlyBase": "FBrf0224194", "PubMed": "", "DOI": ""}}
],
"def_pubs": [], "targeting_splits": []}
"""


class MergeSynonymsTest(unittest.TestCase):
def setUp(self):
self.syns = deserialize_term_info(MEDULLA).get_merged_synonyms()
self.by_label = {s["label"]: s for s in self.syns}

def test_each_synonym_appears_once(self):
labels = [s["label"] for s in self.syns]
self.assertEqual(sorted(labels), ["ME", "ME_L", "m"])
self.assertEqual(len(labels), len(set(labels)))

def test_multi_ref_synonym_merged(self):
# ME_L asserted by flywire + banc -> single entry, both refs, no Unattributed
pub = self.by_label["ME_L"]["publication"]
self.assertIn("Schlegel et al., 2024", pub)
self.assertIn("Bates et al., 2025", pub)
self.assertNotIn("Unattributed", pub)

def test_attributed_pubs_are_markdown_links(self):
# every pub with a short_form/id must render as a markdown ref
self.assertIn("[Schlegel et al., 2024](FBrf0260535)", self.by_label["ME_L"]["publication"])
self.assertIn("[Bates et al., 2025](doi_10_1101_2025_07_31_667571)", self.by_label["ME_L"]["publication"])

def test_unattributed_with_type_shows_type_token(self):
# name_in_banc -> Unattributed: surface the type as a plain (unlinked) ref
pub = self.by_label["ME_L"]["publication"]
self.assertIn("name_in_banc", pub)
self.assertNotIn("[name_in_banc]", pub) # not a link

def test_unattributed_only_no_type_has_no_publication(self):
# 'm' is backed only by Unattributed with no type -> shown with no ref
self.assertNotIn("publication", self.by_label["m"])

def test_attributed_single_ref_kept(self):
self.assertIn("[Ito et al., 2014](FBrf0224194)", self.by_label["ME"]["publication"])


class SynonymTypeLabelTest(unittest.TestCase):
def test_opaque_omo_ids_mapped(self):
self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/OMO_0003000"), "abbreviation")
self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/OMO_0003003"), "layperson synonym")

def test_fragment_fallback(self):
self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/fbbt#name_in_banc"), "name_in_banc")
self.assertEqual(synonym_type_label("http://purl.obolibrary.org/obo/ncbitaxon#scientific_name"), "scientific_name")

def test_empty(self):
self.assertEqual(synonym_type_label(""), "")


if __name__ == "__main__":
unittest.main()
98 changes: 98 additions & 0 deletions src/vfbquery/term_info_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,82 @@ def get_synonyms(self) -> List[str]:
return [str(syn) for syn in set(self.pub_syn) if syn]
return list()

def get_merged_synonyms(self) -> List[dict]:
"""Merge pub_syn into one entry per (scope, label) with the combined
list of refs.

The same synonym is often asserted by several datasets/papers, so
pub_syn holds one entry per (synonym, pub). This collapses them so each
synonym is shown once with the combined refs:

- any pub with a real id/short_form is rendered as a markdown link;
- the 'Unattributed' placeholder pub is never linked, but if the entry
carries a synonym type (e.g. name_in_banc) that type is shown as a
plain-text ref, since it is useful provenance for the user;
- a synonym backed only by Unattributed with no type is shown with no
ref at all.
"""
def pub_ref(pub):
"""Markdown ref for an attributed pub, or '' for Unattributed/none."""
core = getattr(pub, 'core', None) if pub else None
if not core:
return ""
sf = getattr(core, 'short_form', '') or ""
if not sf or sf == "Unattributed":
return ""
micro = getattr(pub, 'microref', '') or ""
label = getattr(core, 'label', '') or ""
if micro:
text = micro
elif label:
parts = label.split(",")
text = (parts[0] + "," + parts[1]) if len(parts) > 1 else label
else:
text = sf
return get_link(text, sf)

def type_token(syn):
"""Readable label for the synonym type, e.g. name_in_banc or
'abbreviation' for opaque OMO ids."""
return synonym_type_label(getattr(syn.synonym, 'type', '') or "")

grouped = {}
order = []
for syn in (self.pub_syn or []):
if not (hasattr(syn, 'synonym') and syn.synonym):
continue
label = getattr(syn.synonym, 'label', "") or ""
scope = getattr(syn.synonym, 'scope', "") or "exact"
stype = getattr(syn.synonym, 'type', "") or "synonym"
key = (scope, label)
if key not in grouped:
grouped[key] = {"label": label, "scope": scope, "type": stype, "refs": []}
order.append(key)
entry = grouped[key]
entry_pubs = list(getattr(syn, 'pubs', None) or [])
if getattr(syn, 'pub', None):
entry_pubs.append(syn.pub)
real_refs = [r for r in (pub_ref(p) for p in entry_pubs) if r]
if real_refs:
for ref in real_refs:
if ref not in entry["refs"]:
entry["refs"].append(ref)
else:
# no attributed pub for this assertion: fall back to the
# synonym type as an unlinked ref (e.g. name_in_banc)
tok = type_token(syn)
if tok and tok not in entry["refs"]:
entry["refs"].append(tok)

result = []
for key in order:
entry = grouped[key]
synonym = {"label": entry["label"], "scope": entry["scope"], "type": entry["type"]}
if entry["refs"]:
synonym["publication"] = ", ".join(entry["refs"])
result.append(synonym)
return result

def get_references(self) -> List[dict]:
results = list()
if self.def_pubs:
Expand Down Expand Up @@ -757,6 +833,28 @@ def get_image(data: str, name: str, reference: str):
return image


# Display labels for synonym types whose IRI fragment is an opaque id rather
# than human-readable text (OMO synonym-type ids). Other synonym types (e.g.
# fbbt#name_in_banc, ncbitaxon#scientific_name) already read sensibly as their
# fragment, so are left as-is. Keyed by IRI fragment / short id.
SYNONYM_TYPE_LABELS = {
"OMO_0003000": "abbreviation",
"OMO_0003003": "layperson synonym",
}


def synonym_type_label(type_iri: str) -> str:
"""Human-readable label for a synonym-type IRI.

Returns the curated label for opaque OMO ids, otherwise the IRI fragment
(e.g. name_in_banc, scientific_name). Empty string for no type.
"""
if not type_iri:
return ""
frag = type_iri.split('#')[-1].split('/')[-1]
return SYNONYM_TYPE_LABELS.get(frag, frag)


def get_link(text: str, link: str) -> str:
"""
Creates a markdown formatted link string.
Expand Down
32 changes: 5 additions & 27 deletions src/vfbquery/vfb_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1246,33 +1246,11 @@ def term_info_parse_object(results, short_form):

termInfo["Publications"] = publications

# Add Synonyms from pub_syn. Not gated on Class: Individual terms also
# carry pub_syn (parity gap B — Individual synonyms were dropped when
# this was Class-only).
if termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0:
synonyms = []
for syn in vfbTerm.pub_syn:
if hasattr(syn, 'synonym') and syn.synonym:
synonym = {}
synonym["label"] = syn.synonym.label if hasattr(syn.synonym, 'label') else ""
synonym["scope"] = syn.synonym.scope if hasattr(syn.synonym, 'scope') else "exact"
synonym["type"] = syn.synonym.type if hasattr(syn.synonym, 'type') else "synonym"

if hasattr(syn, 'pubs') and syn.pubs:
pub_refs = []
for pub in syn.pubs:
if hasattr(pub, 'get_microref') and pub.get_microref():
pub_refs.append(pub.get_microref())

if pub_refs:
# Join multiple publication references with commas
synonym["publication"] = ", ".join(pub_refs)
# Fallback to single pub if pubs collection not available
elif hasattr(syn, 'pub') and syn.pub and hasattr(syn.pub, 'get_microref'):
synonym["publication"] = syn.pub.get_microref()

synonyms.append(synonym)

# Add Synonyms for Class entities. pub_syn holds one entry per
# (synonym, pub); get_merged_synonyms() collapses these to one entry per
# synonym with the combined refs and drops the Unattributed placeholder.
if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0:
synonyms = vfbTerm.get_merged_synonyms()
# Only add the synonyms if we found any
if synonyms:
termInfo["Synonyms"] = synonyms
Expand Down
Loading