diff --git a/config/environments/config.rb.sample b/config/environments/config.rb.sample index 0c6c874ec..7cde7237a 100644 --- a/config/environments/config.rb.sample +++ b/config/environments/config.rb.sample @@ -109,19 +109,38 @@ LinkedData.config do |config| } ] config.federated_portals = { - 'agroportal' => { - api: 'http://data.agroportal.lirmm.fr', - ui: 'http://agroportal.lirmm.fr', - apikey: '1cfae05f-9e67-486f-820b-b393dec5764b', - color: '#1e2251' - }, - 'bioportal' => { - api: 'http://data.bioontology.org', - ui: 'http://bioportal.bioontology.org', - apikey: '4a5011ea-75fa-4be6-8e89-f45c8c84844e', - color: '#234979' - }, - + { + name: 'AgroPortal', + api: 'https://data.agroportal.lirmm.fr', + ui: 'https://agroportal.lirmm.fr/', + color: '#3CB371', + apikey: '1de0a270-29c5-4dda-b043-7c3580628cd5', + 'light-color': '#F1F6FA', + }, + { + name: 'BiodivPortal', + api: 'https://data.biodivportal.gfbio.org/', + ui: 'https://biodivportal.gfbio.org/', + apikey: "47a57aa3-7b54-4f34-b695-dbb5f5b7363e", + color: '#349696', + 'light-color': '#EBF5F5', + }, + { + name: 'EcoPortal', + ui: 'https://ecoportal.lifewatch.eu/', + api: 'https://data.ecoportal.lifewatch.eu/', + apikey: "43a437ba-a437-4bf0-affd-ab520e584719", + color: '#2076C9', + 'light-color': '#E9F2FA', + }, + { + name: 'EarthPortal', + ui: 'https://earthportal.eu/', + api: 'https://data.earthportal.eu/', + apikey: "c9147279-954f-41bd-b068-da9b0c441288", + color: '#404696', + 'light-color': '#F0F5F6' + }, } end diff --git a/controllers/federation_controller.rb b/controllers/federation_controller.rb new file mode 100644 index 000000000..9d71acbd8 --- /dev/null +++ b/controllers/federation_controller.rb @@ -0,0 +1,161 @@ +require 'faraday' +require 'parallel' + +class FederationController < ApplicationController + + GATEWAY_URL = "https://terminology.services.base4nfdi.de/api-gateway" + GATEWAY_CONNECTION = Faraday.new(url: GATEWAY_URL) do |conn| + conn.headers['Accept'] = 'application/json' + conn.options.timeout = 30 + conn.options.open_timeout = 10 + end + + namespace "/api/federation" do + + get '/search' do + query = params[:query] || params[:q] + + if query.nil? || query.strip.empty? + error 400, "You must provide a 'query' parameter to execute a search" + end + + databases = (params[:database] || "ontoportal,nerc").split(',').map(&:strip) + + # Appels parallèles : un par database pour éviter la limitation du Gateway + gateway_results = Parallel.map(databases, in_threads: databases.size) do |db| + fetch_gateway(query, db) + end + + docs = [] + errors = [] + + gateway_results.each do |result| + if result[:error] + errors << result[:error] + else + result[:items].each do |item| + if item["backend_type"] == "nerc" + docs << map_nvs_item(item) + else + docs << map_ontoportal_item(item) + end + end + end + end + + total_found = docs.size + + page_data = paginate(docs, total_found) + page_data[:errors] = errors if errors.any? + + content_type 'application/json' + MultiJson.dump(page_data) + end + + end + + helpers do + def fetch_gateway(query, database) + response = GATEWAY_CONNECTION.get("search", { query: query, database: database }) + + if response.success? + data = MultiJson.load(response.body) + items = if data.is_a?(Array) + data + elsif data.is_a?(Hash) && data["collection"] + data["collection"] + else + [] + end + { items: items } + else + { error: "Gateway (#{database}) returned HTTP #{response.status}" } + end + rescue => e + { error: "Gateway (#{database}): #{e.message}" } + end + + def map_ontoportal_item(item) + acronym = item["ontology"] + source_api = item["source"] + source_name = item["source_name"] + ontology_iri = item["ontology_iri"] || "#{source_api}/ontologies/#{acronym}" + concept_id = item["@id"] || item["iri"] + encoded_id = CGI.escape(concept_id) + + { + "prefLabel" => item["label"], + "synonym" => Array(item["synonyms"]), + "definition" => Array(item["descriptions"]), + "obsolete" => item["obsolete"] || false, + "matchType" => nil, + "ontologyType" => nil, + "hasChildren" => item["hasChildren"] || false, + "@id" => concept_id, + "@type" => item["type"] || "http://www.w3.org/2002/07/owl#Class", + "links" => { + "self" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}", + "ontology" => ontology_iri, + "children" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/children", + "parents" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/parents", + "descendants" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/descendants", + "ancestors" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/ancestors", + "instances" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/instances", + "tree" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/tree", + "notes" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/notes", + "mappings" => "#{source_api}/ontologies/#{acronym}/classes/#{encoded_id}/mappings", + "ui" => item["source_url"] || "#{source_api}/ontologies/#{acronym}?p=classes&conceptid=#{encoded_id}" + }, + "source_portal" => source_name + } + end + + def map_nvs_item(item) + concept_id = item["@id"] || item["iri"] + + { + "prefLabel" => item["label"], + "synonym" => Array(item["synonyms"]), + "definition" => Array(item["descriptions"]), + "obsolete" => item["obsolete"] || false, + "matchType" => nil, + "ontologyType" => nil, + "hasChildren" => item["hasChildren"] || false, + "@id" => concept_id, + "@type" => item["type"] || "http://www.w3.org/2004/02/skos/core#Concept", + "links" => { + "self" => concept_id, + "ontology" => item["ontology_iri"] || item["ontology"], + "children" => Array(item["children"]), + "parents" => [], + "descendants" => [], + "ancestors" => [], + "instances" => [], + "tree" => nil, + "notes" => [], + "mappings" => [], + "ui" => concept_id + }, + "source_portal" => "nvs" + } + end + + def paginate(docs, total_found) + current_page = (params[:page] || 1).to_i + pagesize = (params[:pagesize] || 50).to_i + page_count = (total_found / pagesize.to_f).ceil + start_index = (current_page - 1) * pagesize + paged_docs = docs[start_index, pagesize] || [] + + { + page: current_page, + pageCount: page_count, + totalCount: total_found, + prevPage: current_page > 1 ? current_page - 1 : nil, + nextPage: current_page < page_count ? current_page + 1 : nil, + collection: paged_docs + } + end + end + +end diff --git a/controllers/federation_portals_controller.rb b/controllers/federation_portals_controller.rb new file mode 100644 index 000000000..87e704c54 --- /dev/null +++ b/controllers/federation_portals_controller.rb @@ -0,0 +1,34 @@ +class FederationPortalsController < ApplicationController + + namespace "/federation_portals" do + + # search?q=water&portals=agroportal,ecoportal,nvs + get '/search' do + query = params[:query] || params[:q] + error 400, "You must provide a 'query' parameter to execute a search" if query.nil? || query.strip.empty? + + portals = selected_portals(params) + results = federated_portal_search(portals, query, params) + + current_page = (params[:page] || 1).to_i + pagesize = (params[:pagesize] || 50).to_i + total_count = results[:totalCount] + page_count = (total_count / pagesize.to_f).ceil + + page_data = { + page: current_page, + pageCount: page_count, + totalCount: total_count, + prevPage: current_page > 1 ? current_page - 1 : nil, + nextPage: current_page < page_count ? current_page + 1 : nil, + collection: results[:collection] + } + page_data[:errors] = results[:errors] if results[:errors].any? + + content_type 'application/json' + MultiJson.dump(page_data) + end + + end + +end diff --git a/helpers/federation_portals_helper.rb b/helpers/federation_portals_helper.rb new file mode 100644 index 000000000..86d6ab56f --- /dev/null +++ b/helpers/federation_portals_helper.rb @@ -0,0 +1,330 @@ +require 'sinatra/base' +require 'faraday' +require 'parallel' + +module Sinatra + module Helpers + module FederationPortalsHelper + + NVS_URL = 'https://vocab.nerc.ac.uk' + + def selected_portals(params) + all_portals = LinkedData.settings.federated_portals || {} + + if params[:portals].present? + selected = params[:portals].split(',').map(&:strip).map(&:downcase) + all_portals.select { |name, _| selected.include?(name.to_s.downcase) } + else + all_portals + end + end + + def include_nvs?(params) + return true unless params[:portals].present? + params[:portals].split(',').map(&:strip).map(&:downcase).include?('nvs') + end + + def federated_portal_search(portals, query, params) + all_sources = portals.map { |name, config| { type: :portal, name: name, config: config } } + all_sources << { type: :nvs } if include_nvs?(params) + + results = Parallel.map(all_sources, in_threads: all_sources.size) do |source| + if source[:type] == :nvs + nvs_search(query, params) + else + portal_search(source[:name], source[:config], query, params) + end + end + + merge_results(results) + end + + def portal_search(name, config, query, params) + name = name.to_s + cache_key = "federation_portal_up_#{name}" + + cached_status = Sinatra::Helpers::HTTPCacheHelper::REDIS.get(cache_key) rescue nil + if cached_status == "false" + return { portal: name, error: "#{name} is down (cached for 10 minutes)" } + end + + api_url = config[:api] || config['api'] + apikey = config[:apikey] || config['apikey'] + + conn = Faraday.new(url: api_url) do |f| + f.headers['Accept'] = 'application/json' + f.headers['Authorization'] = "apikey token=#{apikey}" + f.options.timeout = 15 + f.options.open_timeout = 5 + end + + response = conn.get('/search', { + q: query, + pagesize: params[:pagesize] || 50, + page: params[:page] || 1 + }) + + if [301, 302].include?(response.status) && response.headers['location'] + response = conn.get(response.headers['location']) + end + + if response.success? + data = MultiJson.load(response.body) + collection = data["collection"] || [] + collection.each do |item| + item["source_portal"] = name + end + { portal: name, collection: collection, totalCount: data["totalCount"] || collection.size } + else + { portal: name, error: "#{name} returned HTTP #{response.status}" } + end + + rescue => e + Sinatra::Helpers::HTTPCacheHelper::REDIS.setex(cache_key, 600, "false") rescue nil + { portal: name, error: "Problem retrieving #{name}: #{e.message}" } + end + + def nvs_search(query, params) + cache_key = "federation_portal_up_nvs" + + cached_status = Sinatra::Helpers::HTTPCacheHelper::REDIS.get(cache_key) rescue nil + if cached_status == "false" + return { portal: "nvs", error: "nvs is down (cached for 10 minutes)" } + end + + conn = Faraday.new(url: NVS_URL) do |f| + f.headers['Accept'] = 'application/json' + f.options.timeout = 15 + f.options.open_timeout = 5 + end + + response = conn.get('/search/content', { + q: query, + pagesize: params[:pagesize] || 50, + page: params[:page] || 1 + }) + + if [301, 302].include?(response.status) && response.headers['location'] + response = conn.get(response.headers['location']) + end + + if response.success? + data = MultiJson.load(response.body) + collections = (data["member"] || []).map do |item| + { + "prefLabel" => item["sdo:name"], + "synonym" => [], + "definition" => [], + "obsolete" => false, + "matchType" => nil, + "ontologyType" => nil, + "hasChildren" => false, + "@id" => item["@id"], + "@type" => "http://www.w3.org/2004/02/skos/core#Concept", + "links" => { + "self" => item["@id"], + "ontology" => item["sdo:inDefinedTermSet"], + "children" => [], + "parents" => [], + "descendants" => [], + "ancestors" => [], + "instances" => [], + "tree" => nil, + "notes" => [], + "mappings" => [], + "ui" => item["@id"] + }, + "source_portal" => "nvs" + } + end + + enrich_nvs_concepts_sparql(collections) + + total = data["totalItems"] || collections.size + { portal: "nvs", collection: collections, totalCount: total } + else + { portal: "nvs", error: "nvs returned HTTP #{response.status}" } + end + + rescue => e + Sinatra::Helpers::HTTPCacheHelper::REDIS.setex(cache_key, 600, "false") rescue nil + { portal: "nvs", error: "Problem retrieving nvs: #{e.message}" } + end + + + + + #solution A + def enrich_nvs_concepts_individual(collections) + Parallel.each(collections, in_threads: [collections.size, 10].min) do |concept| + detail_url = concept["@id"] + next unless detail_url + + detail_url = detail_url + '/' unless detail_url.end_with?('/') + + conn = Faraday.new(url: detail_url) do |f| + f.headers['Accept'] = 'application/ld+json' + f.options.timeout = 10 + f.options.open_timeout = 5 + end + + response = conn.get('', { + _profile: 'nvs', + _mediatype: 'application/ld+json' + }) + + if response.success? + detail = MultiJson.load(response.body) + + broader = Array(detail["skos:broader"]) + narrower = Array(detail["skos:narrower"]) + same_as = Array(detail["owl:sameAs"]) + + concept["definition"] = detail["skos:definition"] ? [detail["skos:definition"]] : [] + concept["synonym"] = detail["skos:altLabel"] ? Array(detail["skos:altLabel"]) : [] + concept["obsolete"] = detail["owl:deprecated"] || false + concept["hasChildren"] = !narrower.empty? + + concept["links"]["parents"] = broader.map { |b| b.is_a?(Hash) ? b["@id"] : b.to_s } + concept["links"]["children"] = narrower.map { |n| n.is_a?(Hash) ? n["@id"] : n.to_s } + concept["links"]["notes"] = detail["skos:note"] ? [detail["skos:note"]] : [] + concept["links"]["mappings"] = same_as.map { |s| s.is_a?(Hash) ? s["@id"] : s.to_s } + end + rescue => e + end + end + + NVS_SPARQL_URL = 'https://vocab.nerc.ac.uk/sparql/sparql' + + #solution B + def enrich_nvs_concepts_sparql(collections) + return if collections.empty? + + iris = collections.map { |c| c["@id"] }.compact + return if iris.empty? + + values = iris.map { |iri| + uri = iri.end_with?('/') ? iri : "#{iri}/" + "<#{uri}>" + }.join(' ') + + sparql_query = <<~SPARQL + PREFIX skos: + PREFIX owl: + SELECT ?s ?definition ?altLabel ?broader ?narrower ?deprecated ?sameAs ?note WHERE { + VALUES ?s { #{values} } + OPTIONAL { ?s skos:definition ?definition } + OPTIONAL { ?s skos:altLabel ?altLabel } + OPTIONAL { ?s skos:broader ?broader } + OPTIONAL { ?s skos:narrower ?narrower } + OPTIONAL { ?s owl:deprecated ?deprecated } + OPTIONAL { ?s owl:sameAs ?sameAs } + OPTIONAL { ?s skos:note ?note } + } + SPARQL + + conn = Faraday.new(url: NVS_SPARQL_URL) do |f| + f.request :url_encoded + f.headers['Accept'] = 'application/sparql-results+json' + f.options.timeout = 15 + f.options.open_timeout = 5 + end + + response = conn.post('', query: sparql_query) + return unless response.success? + + data = MultiJson.load(response.body) + bindings = data.dig("results", "bindings") || [] + + # Regrouper les résultats SPARQL par IRI + grouped = {} + bindings.each do |row| + uri = row.dig("s", "value") + next unless uri + grouped[uri] ||= { definitions: [], altLabels: [], broaders: [], narrowers: [], sameAs: [], notes: [], deprecated: false } + g = grouped[uri] + + val = row.dig("definition", "value") + g[:definitions] << val if val && !val.empty? && !g[:definitions].include?(val) + + val = row.dig("altLabel", "value") + g[:altLabels] << val if val && !val.empty? && !g[:altLabels].include?(val) + + val = row.dig("broader", "value") + g[:broaders] << val if val && !g[:broaders].include?(val) + + val = row.dig("narrower", "value") + g[:narrowers] << val if val && !g[:narrowers].include?(val) + + val = row.dig("sameAs", "value") + g[:sameAs] << val if val && !g[:sameAs].include?(val) + + val = row.dig("note", "value") + g[:notes] << val if val && !val.empty? && !g[:notes].include?(val) + + val = row.dig("deprecated", "value") + g[:deprecated] = true if val == "true" + end + + # Appliquer l'enrichissement à chaque concept + collections.each do |concept| + iri = concept["@id"] + iri_with_slash = iri&.end_with?('/') ? iri : "#{iri}/" + enrichment = grouped[iri] || grouped[iri_with_slash] + next unless enrichment + + concept["definition"] = enrichment[:definitions] + concept["synonym"] = enrichment[:altLabels] + concept["obsolete"] = enrichment[:deprecated] + concept["hasChildren"] = !enrichment[:narrowers].empty? + + concept["links"]["parents"] = enrichment[:broaders] + concept["links"]["children"] = enrichment[:narrowers] + concept["links"]["notes"] = enrichment[:notes] + concept["links"]["mappings"] = enrichment[:sameAs] + end + end + + + + + def merge_results(results) + collection = [] + errors = [] + total_count = 0 + + results.each do |result| + if result[:error] + errors << result[:error] + else + collection.concat(result[:collection]) + total_count += result[:totalCount] + end + end + + seen = {} + merged = [] + + collection.each do |item| + concept_id = item["@id"] + ontology_acronym = item.dig("links", "ontology")&.split('/')&.last + dedup_key = "#{concept_id}||#{ontology_acronym}" + + if seen[dedup_key] + portal_name = item["source_portal"] + seen[dedup_key]["other_portals"] << portal_name + else + item["other_portals"] = [] + seen[dedup_key] = item + merged << item + end + end + + { collection: merged, totalCount: total_count, errors: errors } + end + + end + end +end + +helpers Sinatra::Helpers::FederationPortalsHelper