From 8db3d5b92ad3172ae33f74354e3a6b9083617958 Mon Sep 17 00:00:00 2001 From: Imene-Amirat Date: Mon, 23 Mar 2026 09:32:45 +0100 Subject: [PATCH 1/6] Add federation search endpoint --- controllers/federation_controller.rb | 71 ++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 controllers/federation_controller.rb diff --git a/controllers/federation_controller.rb b/controllers/federation_controller.rb new file mode 100644 index 000000000..b65f7fcbc --- /dev/null +++ b/controllers/federation_controller.rb @@ -0,0 +1,71 @@ +require 'faraday' + +class FederationController < ApplicationController + + GATEWAY_URL = "https://terminology.services.base4nfdi.de/api-gateway" + GATEWAY_CONNECTION = Faraday.new(url: GATEWAY_URL) do |conn| + conn.headers['Accept'] = 'application/json' + end + + + namespace "/api/federation" do + + get'/search' do + query = params[:query] || params[:q] + + if query.nil? || query.strip.empty? + error 400, "You must provide a 'query' parameter to execute a search" + end + + gateway_params = { query: query, database: "ontoportal" } + + response = GATEWAY_CONNECTION.get("search", gateway_params) + + unless response.success? + error response.status, "API Gateway error: #{response.body}" + end + + gateway_response_data = JSON.parse(response.body) + results = gateway_response_data.is_a?(Array) ? gateway_response_data : [] + + docs = [] + + results.each do |item| + acronym = item["ontology"] + ontology_iri = item["ontology_iri"] || item["source"].to_s + "/ontologies/" + acronym.to_s + + doc = { + id: item["@id"] || item["iri"], + prefLabel: item["label"], + synonym: item["synonyms"] || [], + definition: item["descriptions"] || [], + obsolete: item["obsolete"] || false, + matchType: "prefLabel", + ontology_rank: 0.0 + } + + ontology = LinkedData::Models::Ontology.read_only( + id: ontology_iri, + acronym: acronym + ) + + submission = LinkedData::Models::OntologySubmission.read_only( + id: ontology_iri , + ontology: ontology + ) + + doc[:submission] = submission + + instance = LinkedData::Models::Class.read_only(doc) + docs.push(instance) + + end + + total_found = results.size + + reply 200, page_object(docs, total_found) + end + + end + +end From 95aefe2ed1baad1294169f41ae522375620acad5 Mon Sep 17 00:00:00 2001 From: Imene-Amirat Date: Mon, 23 Mar 2026 12:18:06 +0100 Subject: [PATCH 2/6] fix fields --- controllers/federation_controller.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/controllers/federation_controller.rb b/controllers/federation_controller.rb index b65f7fcbc..000ac81bd 100644 --- a/controllers/federation_controller.rb +++ b/controllers/federation_controller.rb @@ -40,7 +40,8 @@ class FederationController < ApplicationController synonym: item["synonyms"] || [], definition: item["descriptions"] || [], obsolete: item["obsolete"] || false, - matchType: "prefLabel", + matchType: "", + ontologyType: "", ontology_rank: 0.0 } From fe3ae4ad80c3bf992201bd47fc000dfb0fc3200b Mon Sep 17 00:00:00 2001 From: Imene-Amirat Date: Mon, 30 Mar 2026 11:21:49 +0200 Subject: [PATCH 3/6] endpoint direct parallel calls to ontoPortal portals --- controllers/federation_controller.rb | 6 +- controllers/federation_portals_controller.rb | 190 +++++++++++++++++++ 2 files changed, 193 insertions(+), 3 deletions(-) create mode 100644 controllers/federation_portals_controller.rb diff --git a/controllers/federation_controller.rb b/controllers/federation_controller.rb index 000ac81bd..2cbca8d32 100644 --- a/controllers/federation_controller.rb +++ b/controllers/federation_controller.rb @@ -17,7 +17,7 @@ class FederationController < ApplicationController error 400, "You must provide a 'query' parameter to execute a search" end - gateway_params = { query: query, database: "ontoportal" } + gateway_params = {query: query, database: "ontoportal"} response = GATEWAY_CONNECTION.get("search", gateway_params) @@ -37,8 +37,8 @@ class FederationController < ApplicationController doc = { id: item["@id"] || item["iri"], prefLabel: item["label"], - synonym: item["synonyms"] || [], - definition: item["descriptions"] || [], + synonym: Array(item["synonyms"]) , + definition: Array(item["descriptions"]), obsolete: item["obsolete"] || false, matchType: "", ontologyType: "", diff --git a/controllers/federation_portals_controller.rb b/controllers/federation_portals_controller.rb new file mode 100644 index 000000000..05839b751 --- /dev/null +++ b/controllers/federation_portals_controller.rb @@ -0,0 +1,190 @@ +require 'faraday' + +class FederationPortalsController < ApplicationController + + FEDERATION_PORTALS = { + earthportal: {url: 'https://data.earthportal.eu', apikey: '9a3f9f33-f512-4a04-bb84-45636068e255' }, + agroportal: {url: 'https://data.agroportal.lirmm.fr', apikey: '1cfae05f-9e67-486f-820b-b393dec5764b' }, + ecoportal: {url: 'https://data.ecoportal.lifewatch.eu', apikey: '43a437ba-a437-4bf0-affd-ab520e584719' }, + biodivportal: {url: 'https://data.biodivportal.gfbio.org', apikey: '47a57aa3-7b54-4f34-b695-dbb5f5b7363e' } + } + + NVS_URL = 'https://vocab.nerc.ac.uk' + + namespace "/api/federation_portals" do + + get '/search' do + query = params[:query] || params[:q] + + if query.nil? || query.strip.empty? + error 400, "You must provide a 'query' parameter to execute a search" + end + + + portals_threads = FEDERATION_PORTALS.map do |name, config| + Thread.new do + begin + conn = Faraday.new(url: config[:url]) do |f| + f.headers['Accept'] = 'application/json' + f.options.timeout = 30 + end + + response = conn.get('/search', { + q: query, + apikey: config[:apikey], + pagesize: params[:pagesize] || 50, + page: params[:page] || 1 + }) + + if response.success? + data = JSON.parse(response.body) + if data["collection"] + data["collection"].each do |item| + item["source_portal"] = name.to_s + end + end + data + else + nil + end + rescue => e + nil + end + end + end + + nvs_thread = Thread.new do + begin + conn = Faraday.new(url: NVS_URL) do |f| + f.headers['Accept'] = 'application/json' + f.options.timeout = 30 + end + + response = conn.get('/search/content', { + q: query, + pagesize: params[:pagesize] || 50, + page: params[:page] || 1 + }) + + if response.success? + data = JSON.parse(response.body) + collections = (data["member"] || []).map do |item| + { + "prefLabel" => item["sdo:name"], + "synonym" => [], + "definition" => [], + "obsolete" => false, + "matchType" => nil, + "ontologyType" => nil, + "hasChildren" => false, + "@id" => item["@id"], + "@type" => "http://www.w3.org/2004/02/skos/core#Concept", + "links" => { + "self" => item["@id"], + "ontology" => item["sdo:inDefinedTermSet"], + "children" => [], + "parents" => [], + "descendants" => [], + "ancestors" => [], + "instances" => [], + "tree" => nil, + "notes" => [], + "mappings" => [], + "ui" => item["@id"] + }, + "source_portal" => "nvs" + } + end + + # Enrichir chaque concept avec les détails NVS (broader, narrower, definition, synonym...) + enrich_threads = collections.map do |concept| + Thread.new do + begin + detail_url = concept["@id"] + next unless detail_url + # Ajouter le trailing slash si absent (NVS redirige 301 sans slash) + detail_url = detail_url + '/' unless detail_url.end_with?('/') + + detail_conn = Faraday.new(url: detail_url) do |f| + f.headers['Accept'] = 'application/ld+json' + f.options.timeout = 10 + end + + detail_response = detail_conn.get('', { + _profile: 'nvs', + _mediatype: 'application/ld+json' + }) + + if detail_response.success? + detail = JSON.parse(detail_response.body) + + broader = Array(detail["skos:broader"]) + narrower = Array(detail["skos:narrower"]) + same_as = Array(detail["owl:sameAs"]) + + concept["definition"] = detail["skos:definition"] ? [detail["skos:definition"]] : [] + concept["synonym"] = detail["skos:altLabel"] ? Array(detail["skos:altLabel"]) : [] + concept["obsolete"] = detail["owl:deprecated"] || false + concept["hasChildren"] = !narrower.empty? + + concept["links"]["parents"] = broader.map { |b| b.is_a?(Hash) ? b["@id"] : b.to_s } + concept["links"]["children"] = narrower.map { |n| n.is_a?(Hash) ? n["@id"] : n.to_s } + concept["links"]["notes"] = detail["skos:note"] ? [detail["skos:note"]] : [] + concept["links"]["mappings"] = same_as.map { |s| s.is_a?(Hash) ? s["@id"] : s.to_s } + end + rescue => e + # en cas d'erreur, on garde les champs par défaut + end + end + end + + enrich_threads.each(&:join) + + total = data["totalItems"] || collections.size + {"collection" => collections, "totalCount" => total} + else + nil + end + rescue => e + nil + end + end + + all_threads = portals_threads + [nvs_thread] + + # waiting results + results = [] + all_threads.each do |thread| + result = thread.value + results.push(result) + end + + results.compact! + + # merger les collections + all_docs = [] + total_count = 0 + + results.each do |portal_result| + all_docs.concat(portal_result["collection"] || []) + total_count += (portal_result["totalCount"] || 0) + end + + # return results avec pagination info + page, page_size = page_params + page_count = total_count > 0 ? (total_count / page_size.to_f).ceil : 0 + content_type :json + { + "page" => page, + "pageCount" => page_count, + "totalCount" => total_count, + "prevPage" => page > 1 ? page - 1 : nil, + "nextPage" => page < page_count ? page + 1 : nil, + "collection" => all_docs + }.to_json + end + + end + + +end From a5d1498b38e74319c9183b3d9ec0ba0d7666d4c7 Mon Sep 17 00:00:00 2001 From: imene-amirat Date: Thu, 2 Apr 2026 16:33:59 +0200 Subject: [PATCH 4/6] refactor direct parallel calls : add redis caching, result deduplication, SPARQL batch for NVS --- config/environments/config.rb.sample | 45 ++- controllers/federation_portals_controller.rb | 204 ++---------- helpers/federation_portals_helper.rb | 331 +++++++++++++++++++ 3 files changed, 387 insertions(+), 193 deletions(-) create mode 100644 helpers/federation_portals_helper.rb diff --git a/config/environments/config.rb.sample b/config/environments/config.rb.sample index 0c6c874ec..7cde7237a 100644 --- a/config/environments/config.rb.sample +++ b/config/environments/config.rb.sample @@ -109,19 +109,38 @@ LinkedData.config do |config| } ] config.federated_portals = { - 'agroportal' => { - api: 'http://data.agroportal.lirmm.fr', - ui: 'http://agroportal.lirmm.fr', - apikey: '1cfae05f-9e67-486f-820b-b393dec5764b', - color: '#1e2251' - }, - 'bioportal' => { - api: 'http://data.bioontology.org', - ui: 'http://bioportal.bioontology.org', - apikey: '4a5011ea-75fa-4be6-8e89-f45c8c84844e', - color: '#234979' - }, - + { + name: 'AgroPortal', + api: 'https://data.agroportal.lirmm.fr', + ui: 'https://agroportal.lirmm.fr/', + color: '#3CB371', + apikey: '1de0a270-29c5-4dda-b043-7c3580628cd5', + 'light-color': '#F1F6FA', + }, + { + name: 'BiodivPortal', + api: 'https://data.biodivportal.gfbio.org/', + ui: 'https://biodivportal.gfbio.org/', + apikey: "47a57aa3-7b54-4f34-b695-dbb5f5b7363e", + color: '#349696', + 'light-color': '#EBF5F5', + }, + { + name: 'EcoPortal', + ui: 'https://ecoportal.lifewatch.eu/', + api: 'https://data.ecoportal.lifewatch.eu/', + apikey: "43a437ba-a437-4bf0-affd-ab520e584719", + color: '#2076C9', + 'light-color': '#E9F2FA', + }, + { + name: 'EarthPortal', + ui: 'https://earthportal.eu/', + api: 'https://data.earthportal.eu/', + apikey: "c9147279-954f-41bd-b068-da9b0c441288", + color: '#404696', + 'light-color': '#F0F5F6' + }, } end diff --git a/controllers/federation_portals_controller.rb b/controllers/federation_portals_controller.rb index 05839b751..87e704c54 100644 --- a/controllers/federation_portals_controller.rb +++ b/controllers/federation_portals_controller.rb @@ -1,190 +1,34 @@ -require 'faraday' - class FederationPortalsController < ApplicationController - FEDERATION_PORTALS = { - earthportal: {url: 'https://data.earthportal.eu', apikey: '9a3f9f33-f512-4a04-bb84-45636068e255' }, - agroportal: {url: 'https://data.agroportal.lirmm.fr', apikey: '1cfae05f-9e67-486f-820b-b393dec5764b' }, - ecoportal: {url: 'https://data.ecoportal.lifewatch.eu', apikey: '43a437ba-a437-4bf0-affd-ab520e584719' }, - biodivportal: {url: 'https://data.biodivportal.gfbio.org', apikey: '47a57aa3-7b54-4f34-b695-dbb5f5b7363e' } - } - - NVS_URL = 'https://vocab.nerc.ac.uk' - - namespace "/api/federation_portals" do + namespace "/federation_portals" do + # search?q=water&portals=agroportal,ecoportal,nvs get '/search' do query = params[:query] || params[:q] - - if query.nil? || query.strip.empty? - error 400, "You must provide a 'query' parameter to execute a search" - end - - - portals_threads = FEDERATION_PORTALS.map do |name, config| - Thread.new do - begin - conn = Faraday.new(url: config[:url]) do |f| - f.headers['Accept'] = 'application/json' - f.options.timeout = 30 - end - - response = conn.get('/search', { - q: query, - apikey: config[:apikey], - pagesize: params[:pagesize] || 50, - page: params[:page] || 1 - }) - - if response.success? - data = JSON.parse(response.body) - if data["collection"] - data["collection"].each do |item| - item["source_portal"] = name.to_s - end - end - data - else - nil - end - rescue => e - nil - end - end - end - - nvs_thread = Thread.new do - begin - conn = Faraday.new(url: NVS_URL) do |f| - f.headers['Accept'] = 'application/json' - f.options.timeout = 30 - end - - response = conn.get('/search/content', { - q: query, - pagesize: params[:pagesize] || 50, - page: params[:page] || 1 - }) - - if response.success? - data = JSON.parse(response.body) - collections = (data["member"] || []).map do |item| - { - "prefLabel" => item["sdo:name"], - "synonym" => [], - "definition" => [], - "obsolete" => false, - "matchType" => nil, - "ontologyType" => nil, - "hasChildren" => false, - "@id" => item["@id"], - "@type" => "http://www.w3.org/2004/02/skos/core#Concept", - "links" => { - "self" => item["@id"], - "ontology" => item["sdo:inDefinedTermSet"], - "children" => [], - "parents" => [], - "descendants" => [], - "ancestors" => [], - "instances" => [], - "tree" => nil, - "notes" => [], - "mappings" => [], - "ui" => item["@id"] - }, - "source_portal" => "nvs" - } - end - - # Enrichir chaque concept avec les détails NVS (broader, narrower, definition, synonym...) - enrich_threads = collections.map do |concept| - Thread.new do - begin - detail_url = concept["@id"] - next unless detail_url - # Ajouter le trailing slash si absent (NVS redirige 301 sans slash) - detail_url = detail_url + '/' unless detail_url.end_with?('/') - - detail_conn = Faraday.new(url: detail_url) do |f| - f.headers['Accept'] = 'application/ld+json' - f.options.timeout = 10 - end - - detail_response = detail_conn.get('', { - _profile: 'nvs', - _mediatype: 'application/ld+json' - }) - - if detail_response.success? - detail = JSON.parse(detail_response.body) - - broader = Array(detail["skos:broader"]) - narrower = Array(detail["skos:narrower"]) - same_as = Array(detail["owl:sameAs"]) - - concept["definition"] = detail["skos:definition"] ? [detail["skos:definition"]] : [] - concept["synonym"] = detail["skos:altLabel"] ? Array(detail["skos:altLabel"]) : [] - concept["obsolete"] = detail["owl:deprecated"] || false - concept["hasChildren"] = !narrower.empty? - - concept["links"]["parents"] = broader.map { |b| b.is_a?(Hash) ? b["@id"] : b.to_s } - concept["links"]["children"] = narrower.map { |n| n.is_a?(Hash) ? n["@id"] : n.to_s } - concept["links"]["notes"] = detail["skos:note"] ? [detail["skos:note"]] : [] - concept["links"]["mappings"] = same_as.map { |s| s.is_a?(Hash) ? s["@id"] : s.to_s } - end - rescue => e - # en cas d'erreur, on garde les champs par défaut - end - end - end - - enrich_threads.each(&:join) - - total = data["totalItems"] || collections.size - {"collection" => collections, "totalCount" => total} - else - nil - end - rescue => e - nil - end - end - - all_threads = portals_threads + [nvs_thread] - - # waiting results - results = [] - all_threads.each do |thread| - result = thread.value - results.push(result) - end - - results.compact! - - # merger les collections - all_docs = [] - total_count = 0 - - results.each do |portal_result| - all_docs.concat(portal_result["collection"] || []) - total_count += (portal_result["totalCount"] || 0) - end - - # return results avec pagination info - page, page_size = page_params - page_count = total_count > 0 ? (total_count / page_size.to_f).ceil : 0 - content_type :json - { - "page" => page, - "pageCount" => page_count, - "totalCount" => total_count, - "prevPage" => page > 1 ? page - 1 : nil, - "nextPage" => page < page_count ? page + 1 : nil, - "collection" => all_docs - }.to_json + error 400, "You must provide a 'query' parameter to execute a search" if query.nil? || query.strip.empty? + + portals = selected_portals(params) + results = federated_portal_search(portals, query, params) + + current_page = (params[:page] || 1).to_i + pagesize = (params[:pagesize] || 50).to_i + total_count = results[:totalCount] + page_count = (total_count / pagesize.to_f).ceil + + page_data = { + page: current_page, + pageCount: page_count, + totalCount: total_count, + prevPage: current_page > 1 ? current_page - 1 : nil, + nextPage: current_page < page_count ? current_page + 1 : nil, + collection: results[:collection] + } + page_data[:errors] = results[:errors] if results[:errors].any? + + content_type 'application/json' + MultiJson.dump(page_data) end end - end diff --git a/helpers/federation_portals_helper.rb b/helpers/federation_portals_helper.rb new file mode 100644 index 000000000..c92b57c91 --- /dev/null +++ b/helpers/federation_portals_helper.rb @@ -0,0 +1,331 @@ +require 'sinatra/base' +require 'faraday' +require 'parallel' + +module Sinatra + module Helpers + module FederationPortalsHelper + + NVS_URL = 'https://vocab.nerc.ac.uk' + + def selected_portals(params) + all_portals = LinkedData.settings.federated_portals || {} + + if params[:portals].present? + selected = params[:portals].split(',').map(&:strip).map(&:downcase) + all_portals.select { |name, _| selected.include?(name.to_s.downcase) } + else + all_portals + end + end + + def include_nvs?(params) + return true unless params[:portals].present? + params[:portals].split(',').map(&:strip).map(&:downcase).include?('nvs') + end + + def federated_portal_search(portals, query, params) + all_sources = portals.map { |name, config| { type: :portal, name: name, config: config } } + all_sources << { type: :nvs } if include_nvs?(params) + + results = Parallel.map(all_sources, in_threads: all_sources.size) do |source| + if source[:type] == :nvs + nvs_search(query, params) + else + portal_search(source[:name], source[:config], query, params) + end + end + + merge_results(results) + end + + def portal_search(name, config, query, params) + name = name.to_s + cache_key = "federation_portal_up_#{name}" + + cached_status = Sinatra::Helpers::HTTPCacheHelper::REDIS.get(cache_key) rescue nil + if cached_status == "false" + return { portal: name, error: "#{name} is down (cached for 10 minutes)" } + end + + api_url = config[:api] || config['api'] + apikey = config[:apikey] || config['apikey'] + + conn = Faraday.new(url: api_url) do |f| + f.headers['Accept'] = 'application/json' + f.headers['Authorization'] = "apikey token=#{apikey}" + f.options.timeout = 15 + f.options.open_timeout = 5 + end + + response = conn.get('/search', { + q: query, + pagesize: params[:pagesize] || 50, + page: params[:page] || 1 + }) + + if [301, 302].include?(response.status) && response.headers['location'] + response = conn.get(response.headers['location']) + end + + if response.success? + data = MultiJson.load(response.body) + collection = data["collection"] || [] + collection.each do |item| + item["source_portal"] = name + end + { portal: name, collection: collection, totalCount: data["totalCount"] || collection.size } + else + { portal: name, error: "#{name} returned HTTP #{response.status}" } + end + + rescue => e + Sinatra::Helpers::HTTPCacheHelper::REDIS.setex(cache_key, 600, "false") rescue nil + { portal: name, error: "Problem retrieving #{name}: #{e.message}" } + end + + def nvs_search(query, params) + cache_key = "federation_portal_up_nvs" + + cached_status = Sinatra::Helpers::HTTPCacheHelper::REDIS.get(cache_key) rescue nil + if cached_status == "false" + return { portal: "nvs", error: "nvs is down (cached for 10 minutes)" } + end + + conn = Faraday.new(url: NVS_URL) do |f| + f.headers['Accept'] = 'application/json' + f.options.timeout = 15 + f.options.open_timeout = 5 + end + + response = conn.get('/search/content', { + q: query, + pagesize: params[:pagesize] || 50, + page: params[:page] || 1 + }) + + if [301, 302].include?(response.status) && response.headers['location'] + response = conn.get(response.headers['location']) + end + + if response.success? + data = MultiJson.load(response.body) + collections = (data["member"] || []).map do |item| + { + "prefLabel" => item["sdo:name"], + "synonym" => [], + "definition" => [], + "obsolete" => false, + "matchType" => nil, + "ontologyType" => nil, + "hasChildren" => false, + "@id" => item["@id"], + "@type" => "http://www.w3.org/2004/02/skos/core#Concept", + "links" => { + "self" => item["@id"], + "ontology" => item["sdo:inDefinedTermSet"], + "children" => [], + "parents" => [], + "descendants" => [], + "ancestors" => [], + "instances" => [], + "tree" => nil, + "notes" => [], + "mappings" => [], + "ui" => item["@id"] + }, + "source_portal" => "nvs" + } + end + + enrich_nvs_concepts_sparql(collections) + + total = data["totalItems"] || collections.size + { portal: "nvs", collection: collections, totalCount: total } + else + { portal: "nvs", error: "nvs returned HTTP #{response.status}" } + end + + rescue => e + Sinatra::Helpers::HTTPCacheHelper::REDIS.setex(cache_key, 600, "false") rescue nil + { portal: "nvs", error: "Problem retrieving nvs: #{e.message}" } + end + + + + + # Solution A — N appels individuels (ancien, fallback) + def enrich_nvs_concepts_individual(collections) + Parallel.each(collections, in_threads: [collections.size, 10].min) do |concept| + detail_url = concept["@id"] + next unless detail_url + + detail_url = detail_url + '/' unless detail_url.end_with?('/') + + conn = Faraday.new(url: detail_url) do |f| + f.headers['Accept'] = 'application/ld+json' + f.options.timeout = 10 + f.options.open_timeout = 5 + end + + response = conn.get('', { + _profile: 'nvs', + _mediatype: 'application/ld+json' + }) + + if response.success? + detail = MultiJson.load(response.body) + + broader = Array(detail["skos:broader"]) + narrower = Array(detail["skos:narrower"]) + same_as = Array(detail["owl:sameAs"]) + + concept["definition"] = detail["skos:definition"] ? [detail["skos:definition"]] : [] + concept["synonym"] = detail["skos:altLabel"] ? Array(detail["skos:altLabel"]) : [] + concept["obsolete"] = detail["owl:deprecated"] || false + concept["hasChildren"] = !narrower.empty? + + concept["links"]["parents"] = broader.map { |b| b.is_a?(Hash) ? b["@id"] : b.to_s } + concept["links"]["children"] = narrower.map { |n| n.is_a?(Hash) ? n["@id"] : n.to_s } + concept["links"]["notes"] = detail["skos:note"] ? [detail["skos:note"]] : [] + concept["links"]["mappings"] = same_as.map { |s| s.is_a?(Hash) ? s["@id"] : s.to_s } + end + rescue => e + # En cas d'erreur, on garde les champs par défaut + end + end + + NVS_SPARQL_URL = 'https://vocab.nerc.ac.uk/sparql/sparql' + + # Solution B — 1 seul appel SPARQL batch (nouveau, rapide) + def enrich_nvs_concepts_sparql(collections) + return if collections.empty? + + iris = collections.map { |c| c["@id"] }.compact + return if iris.empty? + + values = iris.map { |iri| + uri = iri.end_with?('/') ? iri : "#{iri}/" + "<#{uri}>" + }.join(' ') + + sparql_query = <<~SPARQL + PREFIX skos: + PREFIX owl: + SELECT ?s ?definition ?altLabel ?broader ?narrower ?deprecated ?sameAs ?note WHERE { + VALUES ?s { #{values} } + OPTIONAL { ?s skos:definition ?definition } + OPTIONAL { ?s skos:altLabel ?altLabel } + OPTIONAL { ?s skos:broader ?broader } + OPTIONAL { ?s skos:narrower ?narrower } + OPTIONAL { ?s owl:deprecated ?deprecated } + OPTIONAL { ?s owl:sameAs ?sameAs } + OPTIONAL { ?s skos:note ?note } + } + SPARQL + + conn = Faraday.new(url: NVS_SPARQL_URL) do |f| + f.request :url_encoded + f.headers['Accept'] = 'application/sparql-results+json' + f.options.timeout = 15 + f.options.open_timeout = 5 + end + + response = conn.post('', query: sparql_query) + return unless response.success? + + data = MultiJson.load(response.body) + bindings = data.dig("results", "bindings") || [] + + # Regrouper les résultats SPARQL par IRI + grouped = {} + bindings.each do |row| + uri = row.dig("s", "value") + next unless uri + grouped[uri] ||= { definitions: [], altLabels: [], broaders: [], narrowers: [], sameAs: [], notes: [], deprecated: false } + g = grouped[uri] + + val = row.dig("definition", "value") + g[:definitions] << val if val && !val.empty? && !g[:definitions].include?(val) + + val = row.dig("altLabel", "value") + g[:altLabels] << val if val && !val.empty? && !g[:altLabels].include?(val) + + val = row.dig("broader", "value") + g[:broaders] << val if val && !g[:broaders].include?(val) + + val = row.dig("narrower", "value") + g[:narrowers] << val if val && !g[:narrowers].include?(val) + + val = row.dig("sameAs", "value") + g[:sameAs] << val if val && !g[:sameAs].include?(val) + + val = row.dig("note", "value") + g[:notes] << val if val && !val.empty? && !g[:notes].include?(val) + + val = row.dig("deprecated", "value") + g[:deprecated] = true if val == "true" + end + + # Appliquer l'enrichissement à chaque concept + collections.each do |concept| + iri = concept["@id"] + iri_with_slash = iri&.end_with?('/') ? iri : "#{iri}/" + enrichment = grouped[iri] || grouped[iri_with_slash] + next unless enrichment + + concept["definition"] = enrichment[:definitions] + concept["synonym"] = enrichment[:altLabels] + concept["obsolete"] = enrichment[:deprecated] + concept["hasChildren"] = !enrichment[:narrowers].empty? + + concept["links"]["parents"] = enrichment[:broaders] + concept["links"]["children"] = enrichment[:narrowers] + concept["links"]["notes"] = enrichment[:notes] + concept["links"]["mappings"] = enrichment[:sameAs] + end + end + + + + + def merge_results(results) + collection = [] + errors = [] + total_count = 0 + + results.each do |result| + if result[:error] + errors << result[:error] + else + collection.concat(result[:collection]) + total_count += result[:totalCount] + end + end + + seen = {} + merged = [] + + collection.each do |item| + concept_id = item["@id"] + ontology_acronym = item.dig("links", "ontology")&.split('/')&.last + dedup_key = "#{concept_id}||#{ontology_acronym}" + + if seen[dedup_key] + portal_name = item["source_portal"] + seen[dedup_key]["other_portals"] << portal_name + else + item["other_portals"] = [] + seen[dedup_key] = item + merged << item + end + end + + { collection: merged, totalCount: total_count, errors: errors } + end + + end + end +end + +helpers Sinatra::Helpers::FederationPortalsHelper From 8b93d4b1e12264b0ac30595306e4ae63a46f489b Mon Sep 17 00:00:00 2001 From: imene-amirat Date: Thu, 2 Apr 2026 16:34:18 +0200 Subject: [PATCH 5/6] move logic to helper --- helpers/federation_portals_helper.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/helpers/federation_portals_helper.rb b/helpers/federation_portals_helper.rb index c92b57c91..86d6ab56f 100644 --- a/helpers/federation_portals_helper.rb +++ b/helpers/federation_portals_helper.rb @@ -154,7 +154,7 @@ def nvs_search(query, params) - # Solution A — N appels individuels (ancien, fallback) + #solution A def enrich_nvs_concepts_individual(collections) Parallel.each(collections, in_threads: [collections.size, 10].min) do |concept| detail_url = concept["@id"] @@ -191,13 +191,12 @@ def enrich_nvs_concepts_individual(collections) concept["links"]["mappings"] = same_as.map { |s| s.is_a?(Hash) ? s["@id"] : s.to_s } end rescue => e - # En cas d'erreur, on garde les champs par défaut end end NVS_SPARQL_URL = 'https://vocab.nerc.ac.uk/sparql/sparql' - # Solution B — 1 seul appel SPARQL batch (nouveau, rapide) + #solution B def enrich_nvs_concepts_sparql(collections) return if collections.empty? @@ -287,7 +286,7 @@ def enrich_nvs_concepts_sparql(collections) end - + def merge_results(results) collection = [] From f50909dfcfb1a567ab209d62af4e2543a59843d9 Mon Sep 17 00:00:00 2001 From: imene-amirat Date: Thu, 2 Apr 2026 16:35:25 +0200 Subject: [PATCH 6/6] refactor gateway federation search: add NVS --- controllers/federation_controller.rb | 169 ++++++++++++++++++++------- 1 file changed, 129 insertions(+), 40 deletions(-) diff --git a/controllers/federation_controller.rb b/controllers/federation_controller.rb index 2cbca8d32..9d71acbd8 100644 --- a/controllers/federation_controller.rb +++ b/controllers/federation_controller.rb @@ -1,72 +1,161 @@ require 'faraday' +require 'parallel' class FederationController < ApplicationController GATEWAY_URL = "https://terminology.services.base4nfdi.de/api-gateway" GATEWAY_CONNECTION = Faraday.new(url: GATEWAY_URL) do |conn| conn.headers['Accept'] = 'application/json' + conn.options.timeout = 30 + conn.options.open_timeout = 10 end - namespace "/api/federation" do - get'/search' do + get '/search' do query = params[:query] || params[:q] if query.nil? || query.strip.empty? error 400, "You must provide a 'query' parameter to execute a search" end - gateway_params = {query: query, database: "ontoportal"} + databases = (params[:database] || "ontoportal,nerc").split(',').map(&:strip) - response = GATEWAY_CONNECTION.get("search", gateway_params) + # Appels parallèles : un par database pour éviter la limitation du Gateway + gateway_results = Parallel.map(databases, in_threads: databases.size) do |db| + fetch_gateway(query, db) + end - unless response.success? - error response.status, "API Gateway error: #{response.body}" + docs = [] + errors = [] + + gateway_results.each do |result| + if result[:error] + errors << result[:error] + else + result[:items].each do |item| + if item["backend_type"] == "nerc" + docs << map_nvs_item(item) + else + docs << map_ontoportal_item(item) + end + end + end end - gateway_response_data = JSON.parse(response.body) - results = gateway_response_data.is_a?(Array) ? gateway_response_data : [] + total_found = docs.size - docs = [] + page_data = paginate(docs, total_found) + page_data[:errors] = errors if errors.any? + + content_type 'application/json' + MultiJson.dump(page_data) + end - results.each do |item| - acronym = item["ontology"] - ontology_iri = item["ontology_iri"] || item["source"].to_s + "/ontologies/" + acronym.to_s - - doc = { - id: item["@id"] || item["iri"], - prefLabel: item["label"], - synonym: Array(item["synonyms"]) , - definition: Array(item["descriptions"]), - obsolete: item["obsolete"] || false, - matchType: "", - ontologyType: "", - ontology_rank: 0.0 - } - - ontology = LinkedData::Models::Ontology.read_only( - id: ontology_iri, - acronym: acronym - ) - - submission = LinkedData::Models::OntologySubmission.read_only( - id: ontology_iri , - ontology: ontology - ) - - doc[:submission] = submission - - instance = LinkedData::Models::Class.read_only(doc) - docs.push(instance) + end + helpers do + def fetch_gateway(query, database) + response = GATEWAY_CONNECTION.get("search", { query: query, database: database }) + + if response.success? + data = MultiJson.load(response.body) + items = if data.is_a?(Array) + data + elsif data.is_a?(Hash) && data["collection"] + data["collection"] + else + [] + end + { items: items } + else + { error: "Gateway (#{database}) returned HTTP #{response.status}" } end + rescue => e + { error: "Gateway (#{database}): #{e.message}" } + end - total_found = results.size + def map_ontoportal_item(item) + acronym = item["ontology"] + source_api = item["source"] + source_name = item["source_name"] + ontology_iri = item["ontology_iri"] || "#{source_api}/ontologies/#{acronym}" + concept_id = item["@id"] || item["iri"] + encoded_id = CGI.escape(concept_id) + + { + "prefLabel" => item["label"], + "synonym" => Array(item["synonyms"]), + "definition" => Array(item["descriptions"]), + "obsolete" => item["obsolete"] || false, + "matchType" => nil, + "ontologyType" => nil, + "hasChildren" => item["hasChildren"] || false, + "@id" => concept_id, + "@type" => item["type"] || "http://www.w3.org/2002/07/owl#Class", + "links" => { + "self" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}", + "ontology" => ontology_iri, + "children" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/children", + "parents" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/parents", + "descendants" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/descendants", + "ancestors" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/ancestors", + "instances" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/instances", + "tree" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/tree", + "notes" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/notes", + "mappings" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/mappings", + "ui" => item["source_url"] || "#{source_api}/ontologies/#{acronym}?p=classes&conceptid=#{encoded_id}" + }, + "source_portal" => source_name + } + end - reply 200, page_object(docs, total_found) + def map_nvs_item(item) + concept_id = item["@id"] || item["iri"] + + { + "prefLabel" => item["label"], + "synonym" => Array(item["synonyms"]), + "definition" => Array(item["descriptions"]), + "obsolete" => item["obsolete"] || false, + "matchType" => nil, + "ontologyType" => nil, + "hasChildren" => item["hasChildren"] || false, + "@id" => concept_id, + "@type" => item["type"] || "http://www.w3.org/2004/02/skos/core#Concept", + "links" => { + "self" => concept_id, + "ontology" => item["ontology_iri"] || item["ontology"], + "children" => Array(item["children"]), + "parents" => [], + "descendants" => [], + "ancestors" => [], + "instances" => [], + "tree" => nil, + "notes" => [], + "mappings" => [], + "ui" => concept_id + }, + "source_portal" => "nvs" + } end + def paginate(docs, total_found) + current_page = (params[:page] || 1).to_i + pagesize = (params[:pagesize] || 50).to_i + page_count = (total_found / pagesize.to_f).ceil + start_index = (current_page - 1) * pagesize + paged_docs = docs[start_index, pagesize] || [] + + { + page: current_page, + pageCount: page_count, + totalCount: total_found, + prevPage: current_page > 1 ? current_page - 1 : nil, + nextPage: current_page < page_count ? current_page + 1 : nil, + collection: paged_docs + } + end end end