diff --git a/geospatial-sample-provenance-guard/README.md b/geospatial-sample-provenance-guard/README.md new file mode 100644 index 00000000..de436433 --- /dev/null +++ b/geospatial-sample-provenance-guard/README.md @@ -0,0 +1,38 @@ +# Geospatial Sample Provenance Guard + +Self-contained Scientific Knowledge Graph Integration slice for +`SCIBASE-AI/SCIBASE.AI#17`. + +The guard evaluates field-sample and specimen location graph edges before they +appear on entity pages or public discovery recommendations. It checks coordinate +ranges, CRS normalization, country/coordinate consistency, coordinate precision, +sensitive-site redaction, voucher identifiers, dataset DOI resolution, +collection-date plausibility, and sample-to-dataset edge alignment. + +This is intentionally separate from broad graph extraction/navigation, link +audit, ontology drift/alias/synonym controls, relationship conflict arbitration, +author-affiliation disambiguation, artifact lineage, evidence freshness, +instrument-method compatibility, reproducibility routes, recommendation +visibility/diversity, negative-result replication, measurement harmonization, +claim qualifier, ethics provenance, funder award lineage, clinical trial +registry, and software/runtime compatibility slices. + +## Run + +```bash +npm run check +npm test +npm run demo +npm run demo:video +``` + +## Outputs + +- `reports/summary.json` +- `reports/reviewer-packet.md` +- `reports/summary.svg` +- `reports/demo.webm` + +All data is synthetic. The module does not call geocoders, repositories, GIS +systems, ontology services, specimen registries, journal systems, or external +APIs. diff --git a/geospatial-sample-provenance-guard/demo-video.js b/geospatial-sample-provenance-guard/demo-video.js new file mode 100644 index 00000000..fa05eb47 --- /dev/null +++ b/geospatial-sample-provenance-guard/demo-video.js @@ -0,0 +1,173 @@ +const fs = require("fs"); +const os = require("os"); +const path = require("path"); +const { execFileSync } = require("child_process"); + +const reportDir = path.join(__dirname, "reports"); +const outputPath = path.join(reportDir, "demo.webm"); + +const chromeCandidates = [ + process.env.CHROME_PATH, + "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", + "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe", + "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe", + "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe" +].filter(Boolean); + +function findBrowser() { + const found = chromeCandidates.find((candidate) => fs.existsSync(candidate)); + if (!found) { + throw new Error("Chrome or Edge was not found. Set CHROME_PATH to generate reports/demo.webm."); + } + return found; +} + +function fileUrl(filePath) { + return `file:///${filePath.replace(/\\/g, "/")}`; +} + +const html = String.raw` + +
+ +recording+ + +`; + +fs.mkdirSync(reportDir, { recursive: true }); + +const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "geospatial-provenance-demo-")); +const htmlPath = path.join(tempDir, "demo.html"); +const profileDir = path.join(tempDir, "profile"); +fs.writeFileSync(htmlPath, html, "utf8"); + +const stdout = execFileSync( + findBrowser(), + [ + "--headless=new", + "--disable-gpu", + "--disable-dev-shm-usage", + "--autoplay-policy=no-user-gesture-required", + "--run-all-compositor-stages-before-draw", + "--virtual-time-budget=7500", + `--user-data-dir=${profileDir}`, + "--dump-dom", + fileUrl(htmlPath) + ], + { encoding: "utf8", maxBuffer: 30 * 1024 * 1024 } +); + +const match = stdout.match(/data:video\/webm;base64,([A-Za-z0-9+/=]+)/); +if (!match) { + throw new Error(`Demo video generation failed. Browser output ended with: ${stdout.slice(-600)}`); +} + +fs.writeFileSync(outputPath, Buffer.from(match[1], "base64")); +console.log(`Generated ${path.relative(process.cwd(), outputPath)}`); diff --git a/geospatial-sample-provenance-guard/demo.js b/geospatial-sample-provenance-guard/demo.js new file mode 100644 index 00000000..97c3730c --- /dev/null +++ b/geospatial-sample-provenance-guard/demo.js @@ -0,0 +1,18 @@ +const fs = require("fs"); +const path = require("path"); +const { project } = require("./sample-data"); +const { buildReviewPacket, renderMarkdownReport, renderSvgSummary } = require("./index"); + +const reportDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportDir, { recursive: true }); + +const packet = buildReviewPacket(project); + +fs.writeFileSync(path.join(reportDir, "summary.json"), `${JSON.stringify(packet, null, 2)}\n`, "utf8"); +fs.writeFileSync(path.join(reportDir, "reviewer-packet.md"), renderMarkdownReport(packet), "utf8"); +fs.writeFileSync(path.join(reportDir, "summary.svg"), renderSvgSummary(packet), "utf8"); + +console.log(`Generated reports for ${packet.guard}`); +console.log(`Decision: ${packet.decision}`); +console.log(`Score: ${packet.score}`); +console.log(`Findings: ${packet.findings.length}`); diff --git a/geospatial-sample-provenance-guard/index.js b/geospatial-sample-provenance-guard/index.js new file mode 100644 index 00000000..09c037da --- /dev/null +++ b/geospatial-sample-provenance-guard/index.js @@ -0,0 +1,307 @@ +const SEVERITY_WEIGHTS = { + critical: 34, + high: 22, + medium: 10, + low: 4 +}; + +function addFinding(findings, severity, rule, message, action, refs = []) { + findings.push({ severity, rule, message, action, refs }); +} + +function daysBetween(a, b) { + const left = new Date(a).getTime(); + const right = new Date(b).getTime(); + return Math.floor((right - left) / (24 * 60 * 60 * 1000)); +} + +function datasetByDoi(project) { + return new Map(project.datasets.map((dataset) => [dataset.doi, dataset])); +} + +function datasetById(project) { + return new Map(project.datasets.map((dataset) => [dataset.id, dataset])); +} + +function sampleById(project) { + return new Map(project.samples.map((sample) => [sample.id, sample])); +} + +function coordinatesInRange(sample) { + return ( + Number.isFinite(sample.latitude) && + Number.isFinite(sample.longitude) && + sample.latitude >= -90 && + sample.latitude <= 90 && + sample.longitude >= -180 && + sample.longitude <= 180 + ); +} + +function countryBoundsMatch(project, sample) { + const bounds = project.policy.acceptedCountryBounds[sample.country]; + if (!bounds || !coordinatesInRange(sample)) { + return true; + } + return ( + sample.latitude >= bounds.minLat && + sample.latitude <= bounds.maxLat && + sample.longitude >= bounds.minLon && + sample.longitude <= bounds.maxLon + ); +} + +function evaluateSample(sample, project, datasetIndex, findings) { + if (!coordinatesInRange(sample)) { + addFinding( + findings, + "critical", + "coordinate-range-invalid", + `${sample.id} has coordinates outside valid latitude/longitude ranges.`, + "Suppress the sample node and repair coordinates before graph publication.", + [sample.id] + ); + } + + if (sample.crs !== project.policy.requiredCrs) { + addFinding( + findings, + "high", + "coordinate-crs-not-normalized", + `${sample.id} uses ${sample.crs}, not ${project.policy.requiredCrs}.`, + "Normalize coordinates to the graph CRS before entity pages or recommendations are shown.", + [sample.id, sample.crs] + ); + } + + if (!countryBoundsMatch(project, sample)) { + addFinding( + findings, + "high", + "country-coordinate-mismatch", + `${sample.id} coordinates do not fall inside the expected ${sample.country} bounds.`, + "Hold location-derived graph edges until country and coordinate provenance are reconciled.", + [sample.id, sample.country] + ); + } + + const sensitiveLabels = sample.labels.filter((label) => project.policy.sensitiveLabels.includes(label)); + if (sensitiveLabels.length > 0 && sample.precisionDecimals > project.policy.maxPublicPrecisionDecimals) { + addFinding( + findings, + "critical", + "sensitive-site-overprecise-public-coordinate", + `${sample.id} exposes ${sample.precisionDecimals} decimal coordinates for ${sensitiveLabels.join(", ")}.`, + "Round or redact the location before public discovery recommendations are enabled.", + [sample.id, ...sensitiveLabels] + ); + } + + if (!sample.voucherId) { + addFinding( + findings, + "high", + "sample-voucher-missing", + `${sample.id} has no specimen or field voucher identifier.`, + "Attach a voucher/specimen accession before the sample appears in entity pages.", + [sample.id] + ); + } + + if (!datasetIndex.has(sample.datasetDoi)) { + addFinding( + findings, + "high", + "sample-dataset-doi-unresolved", + `${sample.id} references unresolved dataset DOI ${sample.datasetDoi}.`, + "Repair DOI alignment before graph edges are emitted.", + [sample.id, sample.datasetDoi] + ); + } + + if (daysBetween(project.asOfDate, sample.collectionDate) > project.policy.maxCollectionFutureDays) { + addFinding( + findings, + "medium", + "collection-date-in-future", + `${sample.id} collection date ${sample.collectionDate} is after the graph packet date.`, + "Hold temporal graph edges until the collection date is corrected or the packet date advances.", + [sample.id, sample.collectionDate] + ); + } + + if (project.policy.publicRecommendationRequiresDatasetDoi && sample.publicRecommendation && !sample.datasetDoi) { + addFinding( + findings, + "high", + "public-recommendation-without-dataset-doi", + `${sample.id} is eligible for public recommendations without a dataset DOI.`, + "Disable public discovery for the sample until dataset DOI provenance is present.", + [sample.id] + ); + } +} + +function evaluateEdges(project, findings) { + const samples = sampleById(project); + const datasets = datasetById(project); + const sampleDatasetDoi = datasetByDoi(project); + + for (const edge of project.edges) { + const sample = samples.get(edge.from); + const dataset = datasets.get(edge.to); + if (!sample || !dataset) { + addFinding( + findings, + "critical", + "graph-edge-endpoint-missing", + `${edge.id} references a missing sample or dataset node.`, + "Drop the edge until both graph endpoints exist.", + [edge.id, edge.from, edge.to] + ); + continue; + } + + const declaredDataset = sampleDatasetDoi.get(sample.datasetDoi); + if (!declaredDataset || declaredDataset.id !== dataset.id) { + addFinding( + findings, + "high", + "sample-dataset-edge-doi-mismatch", + `${edge.id} links ${sample.id} to ${dataset.id}, but the sample declares ${sample.datasetDoi}.`, + "Rebuild the graph edge from DOI-resolved dataset metadata.", + [edge.id, sample.id, dataset.id] + ); + } + } +} + +function evaluateGeospatialProvenance(project) { + const findings = []; + const datasets = datasetByDoi(project); + + for (const sample of project.samples) { + evaluateSample(sample, project, datasets, findings); + } + evaluateEdges(project, findings); + + const severitySummary = findings.reduce( + (summary, finding) => { + summary[finding.severity] += 1; + return summary; + }, + { critical: 0, high: 0, medium: 0, low: 0 } + ); + const score = Math.max(0, 100 - findings.reduce((sum, finding) => sum + SEVERITY_WEIGHTS[finding.severity], 0)); + + return { findings, severitySummary, score }; +} + +function decisionFromEvaluation(evaluation) { + if (evaluation.severitySummary.critical > 0) { + return "block-geospatial-graph-publication"; + } + if (evaluation.severitySummary.high > 0 || evaluation.score < 75) { + return "hold-geospatial-edges-for-curator-review"; + } + if (evaluation.score < 90) { + return "manual-review-before-recommendation"; + } + return "geospatial-provenance-ready"; +} + +function buildCuratorActions(findings) { + return findings.map((finding) => ({ + priority: finding.severity === "critical" || finding.severity === "high" ? "blocking" : "review", + rule: finding.rule, + action: finding.action, + refs: finding.refs + })); +} + +function buildReviewPacket(project) { + const evaluation = evaluateGeospatialProvenance(project); + return { + guard: "geospatial-sample-provenance-guard", + issue: "SCIBASE-AI/SCIBASE.AI#17", + graphPacketId: project.graphPacket.id, + entityPage: project.graphPacket.entityPage, + recommendationMode: project.graphPacket.recommendationMode, + asOfDate: project.asOfDate, + decision: decisionFromEvaluation(evaluation), + score: evaluation.score, + severitySummary: evaluation.severitySummary, + findings: evaluation.findings, + curatorActions: buildCuratorActions(evaluation.findings), + safety: [ + "Synthetic sample, dataset, coordinate, and graph-edge metadata only", + "No geocoder, repository, GIS, ontology, specimen, journal, or external API calls", + "No private field locations, real endangered species data, credentials, or live graph mutations" + ] + }; +} + +function renderMarkdownReport(packet) { + const lines = [ + "# Geospatial Sample Provenance Guard", + "", + `Issue: ${packet.issue}`, + `Graph packet: ${packet.graphPacketId}`, + `Entity page: ${packet.entityPage}`, + `Decision: ${packet.decision}`, + `Score: ${packet.score}`, + "", + "## Severity Summary", + "", + "| Severity | Count |", + "| --- | ---: |" + ]; + + for (const severity of ["critical", "high", "medium", "low"]) { + lines.push(`| ${severity} | ${packet.severitySummary[severity]} |`); + } + + lines.push("", "## Findings", ""); + for (const finding of packet.findings) { + lines.push(`- **${finding.severity} / ${finding.rule}**: ${finding.message}`); + lines.push(` - Action: ${finding.action}`); + lines.push(` - Refs: ${finding.refs.join(", ") || "none"}`); + } + + lines.push("", "## Safety", ""); + for (const item of packet.safety) { + lines.push(`- ${item}`); + } + + return `${lines.join("\n")}\n`; +} + +function renderSvgSummary(packet) { + const scoreWidth = Math.max(44, Math.min(760, packet.score * 7.6)); + return ` +`; +} + +module.exports = { + buildReviewPacket, + countryBoundsMatch, + decisionFromEvaluation, + evaluateGeospatialProvenance, + renderMarkdownReport, + renderSvgSummary +}; diff --git a/geospatial-sample-provenance-guard/package.json b/geospatial-sample-provenance-guard/package.json new file mode 100644 index 00000000..43670862 --- /dev/null +++ b/geospatial-sample-provenance-guard/package.json @@ -0,0 +1,14 @@ +{ + "name": "geospatial-sample-provenance-guard", + "version": "1.0.0", + "description": "Deterministic knowledge-graph guard for geospatial field-sample provenance.", + "main": "index.js", + "private": true, + "type": "commonjs", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check demo.js && node --check demo-video.js && node --check test.js", + "test": "node test.js", + "demo": "node demo.js", + "demo:video": "node demo-video.js" + } +} diff --git a/geospatial-sample-provenance-guard/reports/demo.webm b/geospatial-sample-provenance-guard/reports/demo.webm new file mode 100644 index 00000000..b8f79e02 Binary files /dev/null and b/geospatial-sample-provenance-guard/reports/demo.webm differ diff --git a/geospatial-sample-provenance-guard/reports/reviewer-packet.md b/geospatial-sample-provenance-guard/reports/reviewer-packet.md new file mode 100644 index 00000000..86a81795 --- /dev/null +++ b/geospatial-sample-provenance-guard/reports/reviewer-packet.md @@ -0,0 +1,46 @@ +# Geospatial Sample Provenance Guard + +Issue: SCIBASE-AI/SCIBASE.AI#17 +Graph packet: kg-field-samples-2026-05 +Entity page: field-sample-climate-adaptation +Decision: block-geospatial-graph-publication +Score: 0 + +## Severity Summary + +| Severity | Count | +| --- | ---: | +| critical | 1 | +| high | 5 | +| medium | 1 | +| low | 0 | + +## Findings + +- **critical / sensitive-site-overprecise-public-coordinate**: sample-001 exposes 5 decimal coordinates for endangered-species. + - Action: Round or redact the location before public discovery recommendations are enabled. + - Refs: sample-001, endangered-species +- **high / coordinate-crs-not-normalized**: sample-002 uses EPSG:3857, not EPSG:4326. + - Action: Normalize coordinates to the graph CRS before entity pages or recommendations are shown. + - Refs: sample-002, EPSG:3857 +- **high / country-coordinate-mismatch**: sample-002 coordinates do not fall inside the expected Peru bounds. + - Action: Hold location-derived graph edges until country and coordinate provenance are reconciled. + - Refs: sample-002, Peru +- **high / sample-voucher-missing**: sample-002 has no specimen or field voucher identifier. + - Action: Attach a voucher/specimen accession before the sample appears in entity pages. + - Refs: sample-002 +- **high / sample-dataset-doi-unresolved**: sample-002 references unresolved dataset DOI 10.5281/zenodo.unknown. + - Action: Repair DOI alignment before graph edges are emitted. + - Refs: sample-002, 10.5281/zenodo.unknown +- **medium / collection-date-in-future**: sample-002 collection date 2026-06-30 is after the graph packet date. + - Action: Hold temporal graph edges until the collection date is corrected or the packet date advances. + - Refs: sample-002, 2026-06-30 +- **high / sample-dataset-edge-doi-mismatch**: edge-2 links sample-002 to dataset-peru-water, but the sample declares 10.5281/zenodo.unknown. + - Action: Rebuild the graph edge from DOI-resolved dataset metadata. + - Refs: edge-2, sample-002, dataset-peru-water + +## Safety + +- Synthetic sample, dataset, coordinate, and graph-edge metadata only +- No geocoder, repository, GIS, ontology, specimen, journal, or external API calls +- No private field locations, real endangered species data, credentials, or live graph mutations diff --git a/geospatial-sample-provenance-guard/reports/summary.json b/geospatial-sample-provenance-guard/reports/summary.json new file mode 100644 index 00000000..a0073531 --- /dev/null +++ b/geospatial-sample-provenance-guard/reports/summary.json @@ -0,0 +1,158 @@ +{ + "guard": "geospatial-sample-provenance-guard", + "issue": "SCIBASE-AI/SCIBASE.AI#17", + "graphPacketId": "kg-field-samples-2026-05", + "entityPage": "field-sample-climate-adaptation", + "recommendationMode": "public-discovery", + "asOfDate": "2026-05-22", + "decision": "block-geospatial-graph-publication", + "score": 0, + "severitySummary": { + "critical": 1, + "high": 5, + "medium": 1, + "low": 0 + }, + "findings": [ + { + "severity": "critical", + "rule": "sensitive-site-overprecise-public-coordinate", + "message": "sample-001 exposes 5 decimal coordinates for endangered-species.", + "action": "Round or redact the location before public discovery recommendations are enabled.", + "refs": [ + "sample-001", + "endangered-species" + ] + }, + { + "severity": "high", + "rule": "coordinate-crs-not-normalized", + "message": "sample-002 uses EPSG:3857, not EPSG:4326.", + "action": "Normalize coordinates to the graph CRS before entity pages or recommendations are shown.", + "refs": [ + "sample-002", + "EPSG:3857" + ] + }, + { + "severity": "high", + "rule": "country-coordinate-mismatch", + "message": "sample-002 coordinates do not fall inside the expected Peru bounds.", + "action": "Hold location-derived graph edges until country and coordinate provenance are reconciled.", + "refs": [ + "sample-002", + "Peru" + ] + }, + { + "severity": "high", + "rule": "sample-voucher-missing", + "message": "sample-002 has no specimen or field voucher identifier.", + "action": "Attach a voucher/specimen accession before the sample appears in entity pages.", + "refs": [ + "sample-002" + ] + }, + { + "severity": "high", + "rule": "sample-dataset-doi-unresolved", + "message": "sample-002 references unresolved dataset DOI 10.5281/zenodo.unknown.", + "action": "Repair DOI alignment before graph edges are emitted.", + "refs": [ + "sample-002", + "10.5281/zenodo.unknown" + ] + }, + { + "severity": "medium", + "rule": "collection-date-in-future", + "message": "sample-002 collection date 2026-06-30 is after the graph packet date.", + "action": "Hold temporal graph edges until the collection date is corrected or the packet date advances.", + "refs": [ + "sample-002", + "2026-06-30" + ] + }, + { + "severity": "high", + "rule": "sample-dataset-edge-doi-mismatch", + "message": "edge-2 links sample-002 to dataset-peru-water, but the sample declares 10.5281/zenodo.unknown.", + "action": "Rebuild the graph edge from DOI-resolved dataset metadata.", + "refs": [ + "edge-2", + "sample-002", + "dataset-peru-water" + ] + } + ], + "curatorActions": [ + { + "priority": "blocking", + "rule": "sensitive-site-overprecise-public-coordinate", + "action": "Round or redact the location before public discovery recommendations are enabled.", + "refs": [ + "sample-001", + "endangered-species" + ] + }, + { + "priority": "blocking", + "rule": "coordinate-crs-not-normalized", + "action": "Normalize coordinates to the graph CRS before entity pages or recommendations are shown.", + "refs": [ + "sample-002", + "EPSG:3857" + ] + }, + { + "priority": "blocking", + "rule": "country-coordinate-mismatch", + "action": "Hold location-derived graph edges until country and coordinate provenance are reconciled.", + "refs": [ + "sample-002", + "Peru" + ] + }, + { + "priority": "blocking", + "rule": "sample-voucher-missing", + "action": "Attach a voucher/specimen accession before the sample appears in entity pages.", + "refs": [ + "sample-002" + ] + }, + { + "priority": "blocking", + "rule": "sample-dataset-doi-unresolved", + "action": "Repair DOI alignment before graph edges are emitted.", + "refs": [ + "sample-002", + "10.5281/zenodo.unknown" + ] + }, + { + "priority": "review", + "rule": "collection-date-in-future", + "action": "Hold temporal graph edges until the collection date is corrected or the packet date advances.", + "refs": [ + "sample-002", + "2026-06-30" + ] + }, + { + "priority": "blocking", + "rule": "sample-dataset-edge-doi-mismatch", + "action": "Rebuild the graph edge from DOI-resolved dataset metadata.", + "refs": [ + "edge-2", + "sample-002", + "dataset-peru-water" + ] + } + ], + "safety": [ + "Synthetic sample, dataset, coordinate, and graph-edge metadata only", + "No geocoder, repository, GIS, ontology, specimen, journal, or external API calls", + "No private field locations, real endangered species data, credentials, or live graph mutations" + ] +} diff --git a/geospatial-sample-provenance-guard/reports/summary.svg b/geospatial-sample-provenance-guard/reports/summary.svg new file mode 100644 index 00000000..e2fa7791 --- /dev/null +++ b/geospatial-sample-provenance-guard/reports/summary.svg @@ -0,0 +1,16 @@ + diff --git a/geospatial-sample-provenance-guard/requirements-map.md b/geospatial-sample-provenance-guard/requirements-map.md new file mode 100644 index 00000000..496e901a --- /dev/null +++ b/geospatial-sample-provenance-guard/requirements-map.md @@ -0,0 +1,18 @@ +# Requirements Map + +Issue: `SCIBASE-AI/SCIBASE.AI#17` + +| Issue requirement | Implementation | +| --- | --- | +| Scientific knowledge graph integration | Models sample, dataset, and graph-edge metadata before entity pages and recommendations are published. | +| Entity extraction | Validates field-sample entities, coordinates, vouchers, dataset DOI links, and graph edge endpoints. | +| Knowledge navigation | Blocks unsafe or unresolved sample-location edges before they appear in graph navigation. | +| AI research recommendations | Suppresses public recommendations when geospatial provenance, sensitive-site redaction, or DOI evidence is incomplete. | +| Linked data and metadata | Checks CRS normalization, country/coordinate consistency, coordinate precision, collection dates, and sample-to-dataset DOI alignment. | +| Safe local validation | Includes dependency-free tests and demo generation from synthetic sample and graph metadata only. | + +## Non-goals + +- No live geocoder, repository, GIS, ontology, specimen, journal, or external API calls. +- No private field locations, real endangered species data, credentials, or live graph mutations. +- No replacement for ontology, clinical-trial, funder, software, ethics, evidence freshness, or recommendation-diversity workflows. diff --git a/geospatial-sample-provenance-guard/sample-data.js b/geospatial-sample-provenance-guard/sample-data.js new file mode 100644 index 00000000..a45c921c --- /dev/null +++ b/geospatial-sample-provenance-guard/sample-data.js @@ -0,0 +1,85 @@ +const project = { + asOfDate: "2026-05-22", + policy: { + requiredCrs: "EPSG:4326", + maxPublicPrecisionDecimals: 3, + sensitiveLabels: ["endangered-species", "sacred-site", "private-land"], + acceptedCountryBounds: { + Kenya: { minLat: -4.9, maxLat: 5.1, minLon: 33.5, maxLon: 42.0 }, + Peru: { minLat: -18.5, maxLat: 0.2, minLon: -82.0, maxLon: -68.0 }, + Iceland: { minLat: 63.0, maxLat: 67.5, minLon: -25.0, maxLon: -12.0 } + }, + maxCollectionFutureDays: 0, + publicRecommendationRequiresDatasetDoi: true + }, + graphPacket: { + id: "kg-field-samples-2026-05", + entityPage: "field-sample-climate-adaptation", + recommendationMode: "public-discovery" + }, + datasets: [ + { + id: "dataset-kenya-plant", + doi: "10.5281/zenodo.2026052201", + title: "Synthetic East Africa Plant Trait Survey", + license: "CC-BY-4.0" + }, + { + id: "dataset-peru-water", + doi: "10.5281/zenodo.2026052202", + title: "Synthetic Mountain Watershed Samples", + license: "CC0-1.0" + } + ], + samples: [ + { + id: "sample-001", + label: "Highland plant voucher", + country: "Kenya", + latitude: -0.27342, + longitude: 36.07119, + crs: "EPSG:4326", + precisionDecimals: 5, + collectionDate: "2026-04-12", + datasetDoi: "10.5281/zenodo.2026052201", + voucherId: "EAH-2026-0412", + labels: ["endangered-species"], + publicRecommendation: true + }, + { + id: "sample-002", + label: "Watershed sediment core", + country: "Peru", + latitude: 12.543, + longitude: -76.812, + crs: "EPSG:3857", + precisionDecimals: 3, + collectionDate: "2026-06-30", + datasetDoi: "10.5281/zenodo.unknown", + voucherId: "", + labels: [], + publicRecommendation: true + }, + { + id: "sample-003", + label: "Basalt reference swab", + country: "Iceland", + latitude: 64.145, + longitude: -21.942, + crs: "EPSG:4326", + precisionDecimals: 3, + collectionDate: "2026-03-04", + datasetDoi: "10.5281/zenodo.2026052202", + voucherId: "IS-NHM-7781", + labels: [], + publicRecommendation: true + } + ], + edges: [ + { id: "edge-1", from: "sample-001", to: "dataset-kenya-plant", predicate: "includedInDataset" }, + { id: "edge-2", from: "sample-002", to: "dataset-peru-water", predicate: "includedInDataset" }, + { id: "edge-3", from: "sample-003", to: "dataset-peru-water", predicate: "includedInDataset" } + ] +}; + +module.exports = { project }; diff --git a/geospatial-sample-provenance-guard/test.js b/geospatial-sample-provenance-guard/test.js new file mode 100644 index 00000000..1a292f7a --- /dev/null +++ b/geospatial-sample-provenance-guard/test.js @@ -0,0 +1,90 @@ +const assert = require("assert"); +const { project } = require("./sample-data"); +const { + buildReviewPacket, + countryBoundsMatch, + evaluateGeospatialProvenance, + renderMarkdownReport, + renderSvgSummary +} = require("./index"); + +const evaluation = evaluateGeospatialProvenance(project); +const packet = buildReviewPacket(project); + +assert.strictEqual(packet.guard, "geospatial-sample-provenance-guard"); +assert.strictEqual(packet.issue, "SCIBASE-AI/SCIBASE.AI#17"); +assert.strictEqual(packet.decision, "block-geospatial-graph-publication"); + +assert.ok( + evaluation.findings.some((finding) => finding.rule === "sensitive-site-overprecise-public-coordinate"), + "expected sensitive precision finding" +); +assert.ok( + evaluation.findings.some((finding) => finding.rule === "coordinate-crs-not-normalized"), + "expected CRS finding" +); +assert.ok( + evaluation.findings.some((finding) => finding.rule === "country-coordinate-mismatch"), + "expected country-coordinate mismatch finding" +); +assert.ok( + evaluation.findings.some((finding) => finding.rule === "sample-voucher-missing"), + "expected missing voucher finding" +); +assert.ok( + evaluation.findings.some((finding) => finding.rule === "sample-dataset-edge-doi-mismatch"), + "expected DOI edge mismatch finding" +); + +assert.strictEqual(countryBoundsMatch(project, project.samples[0]), true); +assert.strictEqual(countryBoundsMatch(project, project.samples[1]), false); + +const cleanProject = JSON.parse(JSON.stringify(project)); +cleanProject.samples = [ + { + id: "sample-clean-1", + label: "General plant trait sample", + country: "Kenya", + latitude: -0.273, + longitude: 36.071, + crs: "EPSG:4326", + precisionDecimals: 3, + collectionDate: "2026-04-12", + datasetDoi: "10.5281/zenodo.2026052201", + voucherId: "EAH-2026-0412", + labels: [], + publicRecommendation: true + }, + { + id: "sample-clean-2", + label: "Basalt reference swab", + country: "Iceland", + latitude: 64.145, + longitude: -21.942, + crs: "EPSG:4326", + precisionDecimals: 3, + collectionDate: "2026-03-04", + datasetDoi: "10.5281/zenodo.2026052202", + voucherId: "IS-NHM-7781", + labels: [], + publicRecommendation: true + } +]; +cleanProject.edges = [ + { id: "edge-clean-1", from: "sample-clean-1", to: "dataset-kenya-plant", predicate: "includedInDataset" }, + { id: "edge-clean-2", from: "sample-clean-2", to: "dataset-peru-water", predicate: "includedInDataset" } +]; + +const cleanPacket = buildReviewPacket(cleanProject); +assert.strictEqual(cleanPacket.decision, "geospatial-provenance-ready"); +assert.strictEqual(cleanPacket.findings.length, 0); + +const markdown = renderMarkdownReport(packet); +assert.ok(markdown.includes("## Findings")); +assert.ok(markdown.includes("sensitive-site-overprecise-public-coordinate")); + +const svg = renderSvgSummary(packet); +assert.ok(svg.includes("