From faee36d36687f4f58c7742759c5f12a73ec96c48 Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Wed, 20 Aug 2025 16:12:41 +0200 Subject: [PATCH 1/5] Fix: disable copying hasDomain from ontology to submission (#221) --- lib/ontologies_linked_data/models/ontology_submission.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index ff9d4469..7166861e 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -99,7 +99,7 @@ class OntologySubmission < LinkedData::Models::Base # Usage metadata attribute :knownUsage, namespace: :omv, type: :list attribute :designedForOntologyTask, namespace: :omv, type: %i[list uri] - attribute :hasDomain, namespace: :omv, type: :list, default: ->(s) { ontology_has_domain(s) } + attribute :hasDomain, namespace: :omv, type: :list attribute :coverage, namespace: :dct attribute :example, namespace: :vann, type: :list From 5de44b8526625c1c3908fd9975d5943211a912f3 Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Wed, 27 Aug 2025 15:19:28 +0200 Subject: [PATCH 2/5] Fix: add format check to the identifier notation (#223) * add format check to the identifier notation * Fix test using the good format of identifier notations --- .../models/agents/identifier.rb | 29 ++++++++++++++++++- test/models/test_agent.rb | 4 +-- test/models/test_search.rb | 2 +- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/lib/ontologies_linked_data/models/agents/identifier.rb b/lib/ontologies_linked_data/models/agents/identifier.rb index 5e7d77cc..fb648b15 100644 --- a/lib/ontologies_linked_data/models/agents/identifier.rb +++ b/lib/ontologies_linked_data/models/agents/identifier.rb @@ -6,7 +6,7 @@ class AgentIdentifier < LinkedData::Models::Base model :Identifier, namespace: :adms, name_with: lambda { |i| generate_identifier(i.notation, i.schemaAgency)} - attribute :notation, namespace: :skos, enforce: %i[existence no_url] + attribute :notation, namespace: :skos, enforce: %i[existence no_url notation_format] attribute :schemaAgency, namespace: :adms, enforcedValues: IDENTIFIER_SCHEMES.keys, enforce: [:existence] attribute :schemeURI, handler: :scheme_uri_infer attribute :creator, type: :user, enforce: [:existence] @@ -31,6 +31,33 @@ def no_url(inst,attr) return notation&.start_with?('http') ? [:no_url, "`notation` must not be a URL"] : [] end + def notation_format(inst, attr) + inst.bring([attr, :schemaAgency]) if inst.bring?(attr) + notation = inst.send(attr) + schema_agency = inst.send(:schemaAgency) + + # Validate notation format depending on schema to not have weird ids + case schema_agency + when "ROR" + unless notation.match?(/^[0-9a-z]{9}$/i) # ROR IDs are 9-char base32 + return [:notation_format, "`notation` must be compliant with ROR format"] + end + when "ORCID" + unless notation.match?(/^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$/) + return [:notation_format, "`notation` must be compliant with ORCID format"] + end + when "ISNI" + unless notation.match?(/^\d{4}\s?\d{4}\s?\d{4}\s?\d{3}[\dX]$/) + return [:notation_format, "`notation` must be compliant with ISNI format"] + end + when "GRID" + unless notation.match?(/^grid\.[0-9]+\.[a-f0-9]{1,2}$/i) + return [:notation_format, "`notation` must be compliant with GRID format"] + end + end + + end + def scheme_uri_infer self.bring(:schemaAgency) if self.bring?(:schemaAgency) IDENTIFIER_SCHEMES[self.schemaAgency.to_sym] if self.schemaAgency diff --git a/test/models/test_agent.rb b/test/models/test_agent.rb index 9f8dbe7e..a4898b10 100644 --- a/test/models/test_agent.rb +++ b/test/models/test_agent.rb @@ -25,7 +25,7 @@ def test_agent_no_valid ] @identifiers = [ LinkedData::Models::AgentIdentifier.new(notation: '000h6jb29', schemaAgency: 'ROR', creator: @@user1), - LinkedData::Models::AgentIdentifier.new(notation: '000h6jb29', schemaAgency: 'ORCID', creator: @@user1), + LinkedData::Models::AgentIdentifier.new(notation: '0000-0012-1501-8134', schemaAgency: 'ORCID', creator: @@user1), ] @identifiers.each { |i| i.save } @@ -75,7 +75,7 @@ def test_identifier_no_valid refute LinkedData::Models::AgentIdentifier.new(notation: '000h6jb29', schemaAgency: 'ROR', creator: @@user1).valid? - assert LinkedData::Models::AgentIdentifier.new(notation: '000h6jb29', schemaAgency: 'ORCID', creator: @@user1).valid? + assert LinkedData::Models::AgentIdentifier.new(notation: '0000-0012-1501-8134', schemaAgency: 'ORCID', creator: @@user1).valid? id.delete end diff --git a/test/models/test_search.rb b/test/models/test_search.rb index fd77acca..119b8363 100644 --- a/test/models/test_search.rb +++ b/test/models/test_search.rb @@ -97,7 +97,7 @@ def test_search_agents ] @identifiers = [ LinkedData::Models::AgentIdentifier.new(notation: '000h6jb29', schemaAgency: 'ROR', creator: @@user1), - LinkedData::Models::AgentIdentifier.new(notation: '000h6jb29', schemaAgency: 'ORCID', creator: @@user1), + LinkedData::Models::AgentIdentifier.new(notation: '0000-0012-1501-8134', schemaAgency: 'ORCID', creator: @@user1), ] @identifiers.each { |i| i.save } From 24fb3833c2621f7e6d5c8eaa6f8e25349db15bab Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Thu, 28 Aug 2025 14:44:19 +0200 Subject: [PATCH 3/5] Fix: mod api documentation url --- .../models/mod/semantic_artefact_catalog.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ontologies_linked_data/models/mod/semantic_artefact_catalog.rb b/lib/ontologies_linked_data/models/mod/semantic_artefact_catalog.rb index dbc63c71..62451fba 100644 --- a/lib/ontologies_linked_data/models/mod/semantic_artefact_catalog.rb +++ b/lib/ontologies_linked_data/models/mod/semantic_artefact_catalog.rb @@ -164,7 +164,7 @@ class SemanticArtefactCatalog < LinkedData::Models::ModBase LinkedData::Hypermedia::Link.new("notes", lambda {|s| "notes"}, LinkedData::Models::Note.type_uri), LinkedData::Hypermedia::Link.new("replies", lambda {|s| "replies"}, LinkedData::Models::Notes::Reply.type_uri), LinkedData::Hypermedia::Link.new("reviews", lambda {|s| "reviews"}, LinkedData::Models::Review.type_uri), - LinkedData::Hypermedia::Link.new("mod-api_documentation", lambda {|s| "mod-api/doc"}, nil), + LinkedData::Hypermedia::Link.new("mod-api_documentation", lambda {|s| "mod-api/doc/api"}, nil), LinkedData::Hypermedia::Link.new("artefacts", lambda {|s| "mod-api/artefacts"}, LinkedData::Models::SemanticArtefact.type_uri), LinkedData::Hypermedia::Link.new("records", lambda {|s| "mod-api/records"}, LinkedData::Models::SemanticArtefactCatalogRecord.type_uri), LinkedData::Hypermedia::Link.new("search_content", lambda {|s| "mod-api/search/content"}, nil), From 4f5f8f035aeec61a0a231fdc872aeec50084d09e Mon Sep 17 00:00:00 2001 From: imadbourouche Date: Fri, 12 Sep 2025 17:10:55 +0200 Subject: [PATCH 4/5] Fix ISNI agent identifier format check format --- lib/ontologies_linked_data/models/agents/identifier.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/ontologies_linked_data/models/agents/identifier.rb b/lib/ontologies_linked_data/models/agents/identifier.rb index fb648b15..b9441323 100644 --- a/lib/ontologies_linked_data/models/agents/identifier.rb +++ b/lib/ontologies_linked_data/models/agents/identifier.rb @@ -47,8 +47,10 @@ def notation_format(inst, attr) return [:notation_format, "`notation` must be compliant with ORCID format"] end when "ISNI" - unless notation.match?(/^\d{4}\s?\d{4}\s?\d{4}\s?\d{3}[\dX]$/) - return [:notation_format, "`notation` must be compliant with ISNI format"] + # 16 digits, last one can be X + # spaces optiona between the numbers but the problem is that spaces will not be accepted in sparql query becuase goo change them to \u00 + unless notation.match?(/^\d{15}[\dX]$/) + return [:notation_format, "`notation` must be 16 digits (last digit may be X) without spaces"] end when "GRID" unless notation.match?(/^grid\.[0-9]+\.[a-f0-9]{1,2}$/i) From 471a9f10b2404ff643ce61369817bc985faffa19 Mon Sep 17 00:00:00 2001 From: Imad Bourouche Date: Thu, 18 Sep 2025 22:07:53 +0200 Subject: [PATCH 5/5] Feat: add extracted once in the metadata extraction (#224) * Add extracted once in the metadata extraction * add test for extracted once fields --- config/schemes/ontology_submission.yml | 16 +++---- .../operations/submission_extract_metadata.rb | 27 +++++++++++ rakelib/docker_based_test.rake | 4 +- test/models/test_ontology_submission.rb | 45 +++++++++++++++++++ 4 files changed, 82 insertions(+), 10 deletions(-) diff --git a/config/schemes/ontology_submission.yml b/config/schemes/ontology_submission.yml index 8f7a8f66..c2842412 100644 --- a/config/schemes/ontology_submission.yml +++ b/config/schemes/ontology_submission.yml @@ -290,7 +290,7 @@ copyrightHolder: description: [ "SCHEMA: The party holding the legal copyright to the CreativeWork.", "DCTERMS: A person or organization owning or managing rights over the resource." ] - extractedMetadata: true + extractedMetadata: once ### Description @@ -504,7 +504,7 @@ hasCreator: "DOAP: Maintainer of a project, a project leader.", "SCHEMA:author: The author of this content or rating.", "SCHEMA:creator: The creator/author of this CreativeWork." ] - extractedMetadata: true + extractedMetadata: once metadataMappings: [ "omv:hasCreator", "dc:creator", "dcterms:creator", "foaf:maker", "prov:wasAttributedTo", "doap:maintainer", "pav:authoredBy", "pav:createdBy", "schema:author", "schema:creator" ] #Contributor @@ -518,7 +518,7 @@ hasContributor: "OMV: Contributors to the creation of the ontology.", "PAV: The resource was contributed to by the given agent.", "DOAP: Project contributor" ] - extractedMetadata: true + extractedMetadata: once metadataMappings: [ "omv:hasContributor", "dc:contributor", "dcterms:contributor", "doap:helper", "schema:contributor", "pav:contributedBy" ] #Curator @@ -529,7 +529,7 @@ curatedBy: description: [ "PAV: Specifies an agent specialist responsible for shaping the expression in an appropriate format. Often the primary agent responsible for ensuring the quality of the representation.", "MOD: An ontology that is evaluated by an agent." ] - extractedMetadata: true + extractedMetadata: once metadataMappings: [ "mod:evaluatedBy", "pav:curatedBy" ] #Translator @@ -539,7 +539,7 @@ translator: helpText: "Organization or person who adapts a creative work to different languages." description: [ "SCHEMA: Organization or person who adapts a creative work to different languages, regional differences and technical requirements of a target market, or that translates during some event." ] - extractedMetadata: true + extractedMetadata: once metadataMappings: [ "schema:translator" ] #Publisher @@ -551,7 +551,7 @@ publisher: "DCTERMS: An entity responsible for making the resource available.", "SCHEMA: The publisher of creative work.", "ADMS: The name of the agency that issued the identifier." ] - extractedMetadata: true + extractedMetadata: once metadataMappings: [ "dc:publisher", "dcterms:publisher", "schema:publisher", "adms:schemaAgency" ] #Funded or sponsored by @@ -563,7 +563,7 @@ fundedBy: "MOD: An ontology that is sponsored by and developed under a project.", "FOAF: An organization funding a project or person.", "SCHEMA: The organization on whose behalf the creator was working." ] - extractedMetadata: true + extractedMetadata: once metadataMappings: [ "foaf:fundedBy", "mod:sponsoredBy", "schema:sourceOrganization" ] #Endorsed by @@ -574,7 +574,7 @@ endorsedBy: description: [ "MOD: An ontology endorsed by an agent.", "OMV: The parties that have expressed support or approval to this ontology." ] - extractedMetadata: true + extractedMetadata: once metadataMappings: [ "omv:endorsedBy", "mod:endorsedBy" ] ### Community diff --git a/lib/ontologies_linked_data/services/submission_process/operations/submission_extract_metadata.rb b/lib/ontologies_linked_data/services/submission_process/operations/submission_extract_metadata.rb index 2b9d016f..31dc05cc 100644 --- a/lib/ontologies_linked_data/services/submission_process/operations/submission_extract_metadata.rb +++ b/lib/ontologies_linked_data/services/submission_process/operations/submission_extract_metadata.rb @@ -72,6 +72,18 @@ def extract_ontology_metadata(logger, user_params) next unless attr_settings[:extractedMetadata] && attr_not_excluded + extracted_once = attr_settings[:extractedMetadata] == "once" + if extracted_once + # if the attribute should be extracted only once, check if it has already been set in the before last submission + values_before = check_value_from_before_latest_submission(attr, logger) + + if values_before.present? + @submission.send("#{attr}=", values_before) + next + end + end + + # a boolean to check if a value that should be single have already been extracted single_extracted = false type = enforce?(attr, :list) ? :list : :string @@ -283,6 +295,21 @@ def find_or_create_agent(attr, old_val, logger) end agent end + + def check_value_from_before_latest_submission(attr, logger) + @ontology ||= @submission.ontology + @ontology.bring(submissions: [:submissionId]) + + @submissions ||= @ontology.submissions.to_a.sort_by { |s| -s.submissionId } + return nil if @submissions.size < 2 + + before_last_sub = @submissions[1] + before_last_sub.bring(attr) + + before_last_sub.public_send(attr.to_s) + end + + end end end diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake index 52af504c..6a769832 100644 --- a/rakelib/docker_based_test.rake +++ b/rakelib/docker_based_test.rake @@ -68,7 +68,7 @@ namespace :test do ENV["COMPOSE_PROFILES"]="vo" Rake::Task["test:docker:up"].invoke # - unless system("curl -sf http://localhost:8890/sparql || exit 1") + unless system("curl -sf -o /dev/null http://localhost:8890/sparql || exit 1") printf("waiting for Virtuoso container to initialize") sec = 0 until system("curl -sf http://localhost:8890/sparql || exit 1") do @@ -99,7 +99,7 @@ namespace :test do #system("docker compose cp ./test/data/graphdb-repo-config.ttl graphdb:/opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl") #system("docker compose cp ./test/data/graphdb-test-load.nt graphdb:/opt/graphdb/dist/configs/templates/graphdb-test-load.nt") #system('docker compose exec graphdb sh -c "importrdf load -f -c /opt/graphdb/dist/configs/templates/graphdb-repo-config.ttl -m parallel /opt/graphdb/dist/configs/templates/graphdb-test-load.nt ;"') - unless system("curl -sf http://localhost:7200/repositories || exit 1") + unless system("curl -o /dev/null -sf http://localhost:7200/repositories || exit 1") printf("waiting for Graphdb container to initialize") sec = 0 until system("curl -sf http://localhost:7200/repositories || exit 1") do diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index 155aaff0..0fa5d56f 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -1161,6 +1161,51 @@ def test_submission_extract_metadata end end + def test_submission_extract_metadata_once + # This test is only testing one attributefields which is :hasCreator + # TO-DO: make generic to all the fields that has extracted once + submission_parse("AGROOE", "AGROOE Test extract metadata ontology", + "./test/data/ontology_files/agrooeMappings-05-05-2016.owl", 1, + process_rdf: true, extract_metadata: true, generate_missing_labels: false, delete: true) + ont = LinkedData::Models::Ontology.find("AGROOE").first + sub1 = ont.latest_submission + refute_nil sub1 + + sub1.bring_remaining + hasCreator_old_values = ["Alfred DC", "Clement Jonquet", "Gaston Dcterms", "Huguette Doap", "Mirabelle Prov", "Paul Foaf", "Vincent Emonet"] + assert_equal hasCreator_old_values.sort, sub1.hasCreator.map { |x| x.bring_remaining.name }.sort + sub1.save + + agents_number = LinkedData::Models::Agent.where.count + + # modifing the names of the agents (the curation phase) + hasCreator_old_values.each do |agent_name| + agent = LinkedData::Models::Agent.where(name: agent_name).first + refute_nil agent + agent.bring_remaining + agent.name = "#{agent_name}#{rand(1000..9999)}" + agent.save + end + + assert agents_number, LinkedData::Models::Agent.where.count + + # Create new submission + submission_parse("AGROOE", "AGROOE Test extract metadata ontology", "./test/data/ontology_files/agrooeMappings-05-05-2016.owl", 2, + process_rdf: true, extract_metadata: true, generate_missing_labels: false, delete: false) + ont = LinkedData::Models::Ontology.find("AGROOE").first + sub2 = ont.latest_submission + refute_nil sub2 + + sub2.bring_remaining + sub2.save + + # Check if there is duplications + assert agents_number, LinkedData::Models::Agent.where.count + all_sub = LinkedData::Models::Ontology.find("AGROOE").first.bring(:submissions => [:hasCreator => [:name]]).submissions + sub0 = all_sub[0] + sub1 = all_sub[1] + sub0.hasCreator.map { |x| x.name }.sort == sub1.hasCreator.map { |x| x.name }.sort + end def test_submission_delete_remove_files #This one has resources wih accents.