diff --git a/packages/package-crawler.js b/packages/package-crawler.js index 37068aea..1c280fa1 100644 --- a/packages/package-crawler.js +++ b/packages/package-crawler.js @@ -9,8 +9,27 @@ const {XMLParser} = require('fast-xml-parser'); const crypto = require('crypto'); const fs = require('fs'); const path = require('path'); +const dns = require('dns'); +const http = require('http'); +const https = require('https'); +const ipaddr = require('ipaddr.js'); const {debugLog} = require("../tx/operation-context"); +// True if an IP literal is anything other than a normal public (unicast) address: +// loopback, private, link-local (incl. 169.254.169.254 cloud metadata), unique-local, +// CGNAT, multicast, reserved, etc. IPv4-mapped IPv6 addresses are unwrapped first. +function isNonPublicAddress(ip) { + try { + let addr = ipaddr.parse(ip); + if (addr.kind() === 'ipv6' && addr.isIPv4MappedAddress()) { + addr = addr.toIPv4Address(); + } + return addr.range() !== 'unicast'; + } catch (e) { + return true; // unparseable - treat as unsafe + } +} + class PackageCrawler { log; packages = new Set(); @@ -139,10 +158,96 @@ class PackageCrawler { return url.replace(/^http:/, 'https:'); } + // Roots under which local-file feed reads are permitted. Reading local files supports + // locally-configured feeds (e.g. for testing); allowed roots come from + // config.localFeedDirs, plus the directory of a local master feed url. Anything else + // is rejected so a third-party feed can't point a read at an arbitrary server file. + allowedLocalRoots() { + const roots = []; + const cfg = this.config && this.config.localFeedDirs; + if (Array.isArray(cfg)) { + roots.push(...cfg); + } else if (typeof cfg === 'string' && cfg.length > 0) { + roots.push(cfg); + } + if (this.config && typeof this.config.masterUrl === 'string' && this.config.masterUrl.startsWith('/')) { + roots.push(path.dirname(this.config.masterUrl)); + } + return roots.map((r) => path.resolve(r)); + } + + // A DNS lookup wrapper that rejects any host resolving to a non-public address. + // Enforced at connection time (so it also covers redirect targets and defeats + // DNS-rebinding), this is the SSRF guard for all outbound http(s) fetches. + // Set config.allowPrivateAddresses = true to disable (e.g. for local test registries). + ssrfLookup() { + const allowPrivate = !!(this.config && this.config.allowPrivateAddresses); + return (hostname, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + const wantAll = !!(options && options.all); + dns.lookup(hostname, Object.assign({}, options, { all: true }), (err, addresses) => { + if (err) { + callback(err); + return; + } + if (!allowPrivate) { + for (const a of addresses) { + if (isNonPublicAddress(a.address)) { + callback(new Error('Blocked request to non-public address ' + a.address + ' (host ' + hostname + ')')); + return; + } + } + } + if (wantAll) { + callback(null, addresses); + } else { + callback(null, addresses[0].address, addresses[0].family); + } + }); + }; + } + + // http/https agents that route every connection through the SSRF lookup. Cached so + // connections can be pooled across requests. + guardedAgents() { + if (!this._guardedAgents) { + const lookup = this.ssrfLookup(); + class GuardedHttpAgent extends http.Agent { + createConnection(options, cb) { + return super.createConnection(Object.assign({}, options, { lookup }), cb); + } + } + class GuardedHttpsAgent extends https.Agent { + createConnection(options, cb) { + return super.createConnection(Object.assign({}, options, { lookup }), cb); + } + } + this._guardedAgents = { + httpAgent: new GuardedHttpAgent({ keepAlive: true }), + httpsAgent: new GuardedHttpsAgent({ keepAlive: true }) + }; + } + return this._guardedAgents; + } + + // Resolve a local feed path and confine it to an allowed root (path-injection guard). + resolveLocalReadPath(url) { + const resolved = path.resolve(url); + const roots = this.allowedLocalRoots(); + const allowed = roots.some((root) => resolved === root || resolved.startsWith(root + path.sep)); + if (!allowed) { + throw new Error('Refusing to read local file outside the allowed feed directories: ' + url); + } + return resolved; + } + async fetchJson(url) { try { if (url.startsWith("/")) { - const content = await fs.promises.readFile(url, "utf8"); + const content = await fs.promises.readFile(this.resolveLocalReadPath(url), "utf8"); return JSON.parse(content); } else { const response = await axios.get(url, { @@ -150,7 +255,9 @@ class PackageCrawler { signal: this.abortController?.signal, headers: { 'User-Agent': 'FHIR Package Crawler/1.0' - } + }, + httpAgent: this.guardedAgents().httpAgent, + httpsAgent: this.guardedAgents().httpsAgent }); return response.data; } @@ -166,7 +273,7 @@ class PackageCrawler { async fetchXml(url) { try { if (url.startsWith("/")) { - const content = await fs.promises.readFile(url, 'utf8'); + const content = await fs.promises.readFile(this.resolveLocalReadPath(url), 'utf8'); const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_', @@ -180,7 +287,9 @@ class PackageCrawler { signal: this.abortController?.signal, headers: { 'User-Agent': 'FHIR Package Crawler/1.0' - } + }, + httpAgent: this.guardedAgents().httpAgent, + httpsAgent: this.guardedAgents().httpsAgent }); const parser = new XMLParser({ @@ -203,7 +312,7 @@ class PackageCrawler { async fetchUrl(url) { try { if (url.startsWith("/")) { - const buffer = await fs.promises.readFile(url); + const buffer = await fs.promises.readFile(this.resolveLocalReadPath(url)); this.totalBytes += buffer.byteLength; return buffer; } else { @@ -213,7 +322,9 @@ class PackageCrawler { signal: this.abortController?.signal, headers: { 'User-Agent': 'FHIR Package Crawler/1.0' - } + }, + httpAgent: this.guardedAgents().httpAgent, + httpsAgent: this.guardedAgents().httpsAgent }); this.totalBytes += response.data.byteLength; @@ -558,6 +669,12 @@ class PackageCrawler { if (npmPackage.hasJavaScript && !isTemplate && id !== 'hl7.fhir.pubpack') { throw new Error(`Package ${idver} rejected: contains JavaScript files but is not a template package`); } + // The feed gate (item.notForPublication) only sees the RSS entry. A package whose + // feed entry is clean can still carry notForPublication inside the tarball - that is + // a draft build that must never enter the registry. Reject it here too. + if (npmPackage.notForPublication) { + throw new Error(`Package ${idver} rejected: tarball is flagged notForPublication (draft build, not suitable for publication)`); + } // Extract URLs from package const urls = this.processPackageUrls(npmPackage); diff --git a/packages/packages.js b/packages/packages.js index 287be9b9..842a4822 100644 --- a/packages/packages.js +++ b/packages/packages.js @@ -592,6 +592,64 @@ class PackagesModule { } } + // Re-fetch a single package tarball and replace whatever is stored for it, bypassing + // the crawler's GUID dedup and notForPublication feed gate. Used to push out a + // corrected package that was already (mis)published. + async forceUpdatePackage(link) { + // Only allow fetching over http(s). This endpoint must never be usable to read + // local server files (path injection) - tarballs are always published web URLs. + if (!link || !/^https?:\/\//i.test(link)) { + throw new Error('Invalid package link (must be an http(s) URL): ' + link); + } + if (!this.crawler) { + this.crawler = new PackageCrawler(this.config, this.db, this.stats); + } + + // The feed uses the versioned package.tgz url as the (permalink) GUID, so reusing + // the link as the GUID keeps a later crawl from inserting a duplicate. + const guid = link; + + const buffer = await this.crawler.fetchUrl(link); + const npm = await this.crawler.extractNpmPackage(buffer, link); + + // Refuse to re-store a still-broken package - that would just re-publish the bug. + if (npm.notForPublication) { + throw new Error('Refusing to store ' + npm.id + '#' + npm.version + ': fetched tarball is still flagged notForPublication'); + } + + const idver = npm.id + '#' + npm.version; + const replaced = await this.deleteVersionsByGuid(guid); + + const itemLog = { status: '??' }; + await this.crawler.store(link, link, guid, new Date(), buffer, idver, itemLog); + + pckLog.info('Force-updated ' + idver + ' from ' + link + ' (replaced ' + replaced + ' existing row(s))'); + return { status: 'updated', id: npm.id, version: npm.version, replaced }; + } + + // Delete a stored package version and all of its child rows, by GUID. + deleteVersionsByGuid(guid) { + return new Promise((resolve, reject) => { + this.db.all('SELECT PackageVersionKey FROM PackageVersions WHERE GUID = ?', [guid], (err, rows) => { + if (err) return reject(err); + const keys = (rows || []).map(r => r.PackageVersionKey); + if (keys.length === 0) return resolve(0); + const ph = keys.map(() => '?').join(','); + const stmts = [ + 'DELETE FROM PackageFHIRVersions WHERE PackageVersionKey IN (' + ph + ')', + 'DELETE FROM PackageDependencies WHERE PackageVersionKey IN (' + ph + ')', + 'DELETE FROM PackageURLs WHERE PackageVersionKey IN (' + ph + ')', + 'DELETE FROM PackageVersions WHERE PackageVersionKey IN (' + ph + ')' + ]; + const runNext = (i) => { + if (i >= stmts.length) return resolve(keys.length); + this.db.run(stmts[i], keys, (e) => e ? reject(e) : runNext(i + 1)); + }; + runNext(0); + }); + }); + } + async initializeDatabase() { return new Promise((resolve, reject) => { // Use config path if absolute, otherwise resolve relative to data dir @@ -1223,6 +1281,52 @@ class PackagesModule { } }); + // Force-refresh specific packages, bypassing the feed. The crawler only fetches a + // package once (it dedupes on the feed GUID) and skips anything flagged + // notForPublication, so there is normally no way to make it re-pick-up a package + // that was published incorrectly and later corrected. This endpoint re-fetches the + // tarball(s) directly and replaces whatever is stored. + // + // POST /update-package { "links": ["http://hl7.org/fhir/uv/ips/2.0.1/package.tgz", ...] } + // + // The link is the versioned package.tgz url, which is exactly the GUID the feed uses, + // so the replacement keeps the same GUID and a later crawl won't create a duplicate. + // If config.updateToken is set, the request must carry it in the x-update-token header. + this.router.post('/update-package', async (req, res) => { + const start = Date.now(); + try { + if (this.config.updateToken && req.headers['x-update-token'] !== this.config.updateToken) { + res.status(403).json({ error: 'forbidden: missing or invalid x-update-token' }); + return; + } + let links = req.body && (req.body.links || req.body.packages || (req.body.url ? [req.body.url] : (req.body.link ? [req.body.link] : null))); + if (typeof links === 'string') links = [links]; + if (!Array.isArray(links) || links.length === 0) { + res.status(400).json({ error: 'Provide a JSON body like {"links": ["", ...]}' }); + return; + } + const results = []; + for (const link of links) { + try { + results.push(Object.assign({ link }, await this.forceUpdatePackage(link))); + } catch (e) { + pckLog.error('Force update failed for ' + link + ': ' + e.message); + results.push({ link, status: 'error', error: e.message }); + } + } + const failed = results.filter(r => r.status === 'error').length; + res.status(failed === results.length ? 500 : 200).json({ + message: 'Processed ' + results.length + ' package(s), ' + failed + ' failed', + results + }); + } catch (error) { + pckLog.error('update-package endpoint failed:', error); + res.status(500).json({ error: 'update-package failed', message: error.message }); + } finally { + this.stats.countRequest('update-package', Date.now() - start); + } + }); + // Crawler statistics endpoint (existing) this.router.get('/stats', async (req, res) => { const start = Date.now(); diff --git a/publisher/publisher.js b/publisher/publisher.js index 3045fecc..236a90cc 100644 --- a/publisher/publisher.js +++ b/publisher/publisher.js @@ -200,6 +200,17 @@ class PublisherModule { }); }); } + if (!columnNames.includes('publisher_version')) { + await new Promise((resolve, reject) => { + this.db.run('ALTER TABLE tasks ADD COLUMN publisher_version TEXT', (err) => { + if (err) reject(err); + else { + this.logger.info('Migration: added publisher_version column to tasks table'); + resolve(); + } + }); + }); + } const websiteColumns = await new Promise((resolve, reject) => { this.db.all("PRAGMA table_info(websites)", (err, rows) => { if (err) reject(err); @@ -542,6 +553,11 @@ class PublisherModule { // Step 3: Clone GitHub repository await this.cloneRepository(task, draftDir); + // Step 3b: Update SUSHI to the latest release. The IG Publisher runs SUSHI with + // --require-latest, so a SUSHI that has fallen behind npm's latest makes the build + // fail. Refreshing it here keeps draft builds working. + await this.ensureLatestSushi(task.id); + // Step 4: Run IG publisher await this.runIGPublisher(publisherJar, draftDir, logFile, task.id); @@ -578,6 +594,16 @@ class PublisherModule { throw new Error('Could not find publisher.jar in latest release'); } + // Record which IG Publisher version this task is using. The same jar is reused + // for the publication run, so this is the version that produced the output. + const publisherVersion = releaseResponse.data.tag_name || releaseResponse.data.name || 'unknown'; + try { + await this.updateTaskFields(taskId, { publisher_version: publisherVersion }); + } catch (e) { + this.logger.warn('Failed to record publisher_version for task ' + taskId + ': ' + e.message); + } + await this.logTaskMessage(taskId, 'info', 'Using IG Publisher version ' + publisherVersion); + await this.logTaskMessage(taskId, 'info', 'Downloading from: ' + downloadUrl); // Download the file @@ -603,6 +629,62 @@ class PublisherModule { } } + // Directory holding FHIRsmith's own copy of SUSHI. Installing here (rather than -g) + // needs no root: on the server the global npm prefix (/usr) is root-owned. + sushiDir() { + return folders.filePath('publisher', 'sushi'); + } + + // Environment for spawning the IG Publisher so it finds our managed SUSHI on PATH. + // The publisher resolves the `sushi` command from PATH, so prepending our bin dir + // makes it use the version we just installed. + publisherEnv() { + const binDir = path.join(this.sushiDir(), 'bin'); + return Object.assign({}, process.env, { + PATH: binDir + path.delimiter + (process.env.PATH || '') + }); + } + + // Update FHIRsmith's managed SUSHI to the latest npm release before a run. The IG + // Publisher invokes SUSHI with --require-latest, so a SUSHI that has fallen behind + // npm's latest makes the build abort. We install into a FHIRsmith-owned prefix + // (sushiDir) to avoid needing root for a global install, and the publisher picks it + // up via publisherEnv(). Best-effort: a failure is logged but not fatal - the + // corrected runBuild check in the IG Publisher now turns a stale SUSHI into a loud + // publication failure rather than a silently-published draft. + async ensureLatestSushi(taskId) { + const { spawn } = require('child_process'); + const dir = this.sushiDir(); + fs.mkdirSync(dir, { recursive: true }); + await this.logTaskMessage(taskId, 'info', 'Ensuring SUSHI is up to date in ' + dir + ' ...'); + await new Promise((resolve) => { + // -g with --prefix makes npm treat `dir` as the global prefix, so the binary + // lands at dir/bin/sushi (a FHIRsmith-owned, writable location). + const npm = spawn('npm', ['install', '-g', 'fsh-sushi@latest', '--prefix', dir], { stdio: ['ignore', 'pipe', 'pipe'] }); + let err = ''; + npm.stdout.on('data', () => { /* ignore */ }); + npm.stderr.on('data', (d) => { err += d.toString(); }); + npm.on('error', async (e) => { + await this.logTaskMessage(taskId, 'warn', 'Could not run npm to update SUSHI: ' + e.message); + resolve(); + }); + npm.on('close', async (code) => { + if (code === 0) { + let version = ''; + try { + version = require('child_process').execSync('sushi --version', { env: this.publisherEnv(), encoding: 'utf8' }).trim(); + } catch (e) { + version = '(version check failed)'; + } + await this.logTaskMessage(taskId, 'info', 'SUSHI is up to date: ' + version); + } else { + await this.logTaskMessage(taskId, 'warn', 'SUSHI update exited with code ' + code + (err ? ': ' + err.trim().slice(-400) : '')); + } + resolve(); + }); + }); + } + async cloneRepository(task, draftDir) { const { spawn } = require('child_process'); const gitUrl = 'https://github.com/' + task.github_org + '/' + task.github_repo + '.git'; @@ -658,7 +740,8 @@ class PublisherModule { '.' ], { cwd: draftDir, - stdio: ['pipe', 'pipe', 'pipe'] + stdio: ['pipe', 'pipe', 'pipe'], + env: this.publisherEnv() }); // Create log file stream @@ -684,7 +767,7 @@ class PublisherModule { const elapsedMs = Date.now() - buildStart; const sinceDataMs = Date.now() - lastDataAt; let logKb = 0; - try { logKb = Math.round(fs.statSync(logFile).size / 1024); } catch (_) {} + try { logKb = Math.round(fs.statSync(logFile).size / 1024); } catch (_) { /* log file not created yet */ } const elapsedMin = Math.floor(elapsedMs / 60000); const elapsedSec = Math.floor(elapsedMs / 1000) % 60; const idleSec = Math.floor(sinceDataMs / 1000); @@ -760,6 +843,91 @@ class PublisherModule { await this.logTaskMessage(task.id, 'info', 'Build output verified: package-id=' + qaData['package-id'] + ', version=' + qaData['ig-ver']); } + // Read package/package.json out of a .tgz without unpacking the whole archive. + inspectPackageTgz(tgzPath) { + const { spawn } = require('child_process'); + return new Promise((resolve, reject) => { + const tar = spawn('tar', ['-xzOf', tgzPath, 'package/package.json']); + let out = ''; + let err = ''; + tar.stdout.on('data', (d) => { out += d.toString(); }); + tar.stderr.on('data', (d) => { err += d.toString(); }); + tar.on('error', reject); + tar.on('close', (code) => { + if (code !== 0) { + reject(new Error('Could not read ' + tgzPath + ': ' + (err.trim() || ('tar exit ' + code)))); + } else { + try { + resolve(JSON.parse(out)); + } catch (e) { + reject(new Error('Invalid package.json in ' + tgzPath + ': ' + e.message)); + } + } + }); + }); + } + + // Confirm the package.tgz files that landed in the web tree are publication builds, + // not the draft. Throws (failing the task) if a draft package was published. + async verifyPublishedPackage(task, website, draftDir) { + await this.logTaskMessage(task.id, 'info', 'Verifying published package(s) are publication builds...'); + + // The intended publication path comes from the IG's publication-request.json. + const prPath = path.join(draftDir, 'publication-request.json'); + if (!fs.existsSync(prPath)) { + await this.logTaskMessage(task.id, 'warn', 'No publication-request.json found at ' + prPath + ' - skipping published-package check'); + return; + } + const pr = JSON.parse(fs.readFileSync(prPath, 'utf8')); + const pubPath = pr.path; + + // The website base url lets us map a canonical url to a folder in the web tree. + const setupPath = path.join(website.local_folder, 'publish-setup.json'); + if (!pubPath || !fs.existsSync(setupPath)) { + await this.logTaskMessage(task.id, 'warn', 'Cannot resolve web path (path or publish-setup.json missing) - skipping published-package check'); + return; + } + const setup = JSON.parse(fs.readFileSync(setupPath, 'utf8')); + const baseUrl = setup.website && setup.website.url; + if (!baseUrl || !pubPath.startsWith(baseUrl)) { + await this.logTaskMessage(task.id, 'warn', 'Publication path ' + pubPath + ' is not under website url ' + baseUrl + ' - skipping published-package check'); + return; + } + + const relVer = pubPath.substring(baseUrl.length).replace(/^\/+/, ''); + const relCur = relVer.replace(/\/[^/]+\/?$/, ''); // strip the version segment for the "current" copy + const candidates = [ + path.join(website.local_folder, relVer, 'package.tgz'), + path.join(website.local_folder, relCur, 'package.tgz') + ]; + + let checked = 0; + for (const pkgPath of candidates) { + if (!fs.existsSync(pkgPath)) { + continue; + } + checked++; + const json = await this.inspectPackageTgz(pkgPath); + const problems = []; + if (json.notForPublication) { + problems.push('notForPublication is set'); + } + if (typeof json.url === 'string' && json.url.startsWith('file:')) { + problems.push('url is a local file path (' + json.url + ')'); + } + if (problems.length > 0) { + throw new Error('Published package ' + pkgPath + ' is a draft build, not a publication build (' + + problems.join('; ') + '). The IG Publisher publication run likely skipped package ' + + 'regeneration (e.g. a Jekyll/template failure). Not committing.'); + } + await this.logTaskMessage(task.id, 'info', 'Verified publication package: ' + pkgPath); + } + + if (checked === 0) { + await this.logTaskMessage(task.id, 'warn', 'No published package.tgz found to verify under ' + relVer + ' or ' + relCur); + } + } + async runPublication(task) { const website = await this.getWebsite(task.website_id); if (!website) { @@ -824,6 +992,12 @@ class PublisherModule { // Step 3: Pull latest web folder before publishing into it await this.runCommand('git', ['pull'], { cwd: website.git_root }, task.id, 'Pulling latest web folder'); + // Step 3b: Update SUSHI to the latest release before the publication run. This is the + // step that previously failed silently: the publication build runs SUSHI with + // --require-latest, and a draft approved days earlier may now face a newer SUSHI on + // npm. Refresh it so the publication build doesn't abort on a version mismatch. + await this.ensureLatestSushi(task.id); + // Step 4: Run the IG publisher in go-publish mode await this.runPublisherGoPublish(task.id, publisherJar, draftDir, website.local_folder, registryDir, historyDir, templatesDir, zipsDir, publishLogFile); @@ -836,6 +1010,12 @@ class PublisherModule { } await this.logTaskMessage(task.id, 'info', 'Publication run verified: ' + pubLogName + ' found'); + // Step 5b: Verify the published package is actually a publication build. + // If the IG Publisher's publication run skipped package regeneration (e.g. a Jekyll + // or template failure), the draft package - flagged notForPublication with a file:// + // url - can survive into the web tree. Catch that here, before anything is committed. + await this.verifyPublishedPackage(task, website, draftDir); + // Step 6: Commit and push the web folder await this.logTaskMessage(task.id, 'info', 'Committing changes to web folder...'); const gitUrl = 'https://github.com/' + task.github_org + '/' + task.github_repo + '.git'; @@ -892,7 +1072,8 @@ class PublisherModule { return new Promise((resolve, reject) => { const java = spawn('java', args, { - stdio: ['pipe', 'pipe', 'pipe'] + stdio: ['pipe', 'pipe', 'pipe'], + env: this.publisherEnv() }); const logStream = fs.createWriteStream(logFile); @@ -916,7 +1097,7 @@ class PublisherModule { const elapsedMs = Date.now() - buildStart; const sinceDataMs = Date.now() - lastDataAt; let logKb = 0; - try { logKb = Math.round(fs.statSync(logFile).size / 1024); } catch (_) {} + try { logKb = Math.round(fs.statSync(logFile).size / 1024); } catch (_) { /* log file not created yet */ } const elapsedMin = Math.floor(elapsedMs / 60000); const elapsedSec = Math.floor(elapsedMs / 1000) % 60; const idleSec = Math.floor(sinceDataMs / 1000); @@ -1217,7 +1398,7 @@ class PublisherModule { } else { content += '
'; content += ''; - content += ''; + content += ''; content += ''; for (const task of tasks) { @@ -1238,6 +1419,7 @@ class PublisherModule { content += ''; content += ''; content += ''; + content += ''; content += ''; content += ''; content += '
IDPackageVersionWebsiteStatusQueuedUserActions
IDPackageVersionWebsiteStatusIG PublisherQueuedUserActions
' + task.version + '' + task.website_name + '' + task.status + '' + (task.publisher_version ? '' + escape(task.publisher_version) + '' : '') + '' + new Date(task.queued_at).toLocaleString() + '' + task.user_name + ''; @@ -1542,6 +1724,10 @@ class PublisherModule { content += '

Status: ' + task.status + '

'; content += '

GitHub: ' + task.github_org + '/' + task.github_repo + ' (' + task.git_branch + ')

'; + if (task.publisher_version) { + content += '

IG Publisher: ' + escape(task.publisher_version) + '

'; + } + if (task.local_folder) { content += '

Local Folder: ' + task.local_folder + '

'; } diff --git a/registry/crawler.js b/registry/crawler.js index ece1b28d..9817a38f 100644 --- a/registry/crawler.js +++ b/registry/crawler.js @@ -2,15 +2,34 @@ // Crawler for gathering server information from terminology servers const axios = require('axios'); -const { - ServerRegistries, - ServerRegistry, - ServerInformation, +const dns = require('dns'); +const http = require('http'); +const https = require('https'); +const ipaddr = require('ipaddr.js'); +const { + ServerRegistries, + ServerRegistry, + ServerInformation, ServerVersionInformation, } = require('./model'); const {Extensions} = require("../tx/library/extensions"); const {debugLog} = require("../tx/operation-context"); +// True if an IP literal is anything other than a normal public (unicast) address: +// loopback, private, link-local (incl. 169.254.169.254 cloud metadata), unique-local, +// CGNAT, multicast, reserved, etc. IPv4-mapped IPv6 addresses are unwrapped first. +function isNonPublicAddress(ip) { + try { + let addr = ipaddr.parse(ip); + if (addr.kind() === 'ipv6' && addr.isIPv4MappedAddress()) { + addr = addr.toIPv4Address(); + } + return addr.range() !== 'unicast'; + } catch (e) { + return true; // unparseable - treat as unsafe + } +} + const MASTER_URL = 'https://fhir.github.io/ig-registry/tx-servers.json'; class RegistryCrawler { @@ -22,7 +41,8 @@ class RegistryCrawler { masterUrl: config.masterUrl || MASTER_URL, userAgent: config.userAgent || 'HealthIntersections/FhirServer', crawlInterval: config.crawlInterval || 5 * 60 * 1000, // 5 minutes default - apiKeys: config.apiKeys || {} // Map of server URL or code to API key + apiKeys: config.apiKeys || {}, // Map of server URL or code to API key + allowPrivateAddresses: config.allowPrivateAddresses || false // SSRF opt-out for local test servers }; this.stats = stats; @@ -39,6 +59,63 @@ class RegistryCrawler { this.log = logv; } + // A DNS lookup wrapper that rejects any host resolving to a non-public address. + // Enforced at connection time (so it also covers redirect targets and defeats + // DNS-rebinding), this is the SSRF guard for outbound fetches. The registry is + // crawled from server-supplied "next" links, so the target host is not trusted. + // Set config.allowPrivateAddresses = true to disable (e.g. for local test servers). + ssrfLookup() { + const allowPrivate = !!(this.config && this.config.allowPrivateAddresses); + return (hostname, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + const wantAll = !!(options && options.all); + dns.lookup(hostname, Object.assign({}, options, { all: true }), (err, addresses) => { + if (err) { + callback(err); + return; + } + if (!allowPrivate) { + for (const a of addresses) { + if (isNonPublicAddress(a.address)) { + callback(new Error('Blocked request to non-public address ' + a.address + ' (host ' + hostname + ')')); + return; + } + } + } + if (wantAll) { + callback(null, addresses); + } else { + callback(null, addresses[0].address, addresses[0].family); + } + }); + }; + } + + // http/https agents that route every connection through the SSRF lookup. + guardedAgents() { + if (!this._guardedAgents) { + const lookup = this.ssrfLookup(); + class GuardedHttpAgent extends http.Agent { + createConnection(options, cb) { + return super.createConnection(Object.assign({}, options, { lookup }), cb); + } + } + class GuardedHttpsAgent extends https.Agent { + createConnection(options, cb) { + return super.createConnection(Object.assign({}, options, { lookup }), cb); + } + } + this._guardedAgents = { + httpAgent: new GuardedHttpAgent({ keepAlive: true }), + httpsAgent: new GuardedHttpsAgent({ keepAlive: true }) + }; + } + return this._guardedAgents; + } + /** * Main entry point - crawl the registry starting from the master URL @@ -455,7 +532,9 @@ class RegistryCrawler { timeout: this.config.timeout, headers: headers, signal: this.abortController?.signal, - validateStatus: (status) => status < 500 // Don't throw on 4xx + validateStatus: (status) => status < 500, // Don't throw on 4xx + httpAgent: this.guardedAgents().httpAgent, + httpsAgent: this.guardedAgents().httpsAgent }); if (response.status >= 400) { diff --git a/registry/registry.js b/registry/registry.js index c0950eba..0006b13f 100644 --- a/registry/registry.js +++ b/registry/registry.js @@ -976,7 +976,15 @@ class RegistryModule { try { const params = this._normalizeQueryParams(req.query); - const {fhirVersion, url, valueSet, usage} = params; + const {fhirVersion, valueSet, usage, version} = params; + let {url} = params; + + // If a version was supplied separately, fold it into the code system + // URL using the canonical url|version syntax that the resolver expects. + // Don't double up if the caller already encoded a version in the url. + if (version && url && !url.includes('|')) { + url = `${url}|${version}`; + } // Convert authoritativeOnly to boolean const authoritativeOnly = params.authoritativeOnly === 'true'; @@ -1219,6 +1227,7 @@ class RegistryModule { buildResolveFormContent(queryParams = {}) { const fhirVersion = queryParams.fhirVersion || ''; const url = queryParams.url || ''; + const version = queryParams.version || ''; const valueSet = queryParams.valueSet || ''; const authoritativeOnly = queryParams.authoritativeOnly === 'true'; @@ -1246,6 +1255,14 @@ class RegistryModule { html += '

'; html += '

Example: http://loinc.org

'; + // Code System Version field (optional) + html += '

'; + html += ''; + html += ``; + html += '

'; + html += '

Optional. Example: 2.74 (combined with the Code System URL as url|version)

'; + // ValueSet URL field - now vertical html += '

'; html += ''; diff --git a/tests/tx/test-cases.test.js b/tests/tx/test-cases.test.js index f9b6fc1d..41387b12 100644 --- a/tests/tx/test-cases.test.js +++ b/tests/tx/test-cases.test.js @@ -176,6 +176,10 @@ describe('simple-cases', () => { await runTest({"suite":"simple-cases","test":"simple-expand-contained"}, "5.0"); }); + // it("simple-expand-contained" + 'R4', async () => { + // await runTest({"suite":"simple-cases","test":"simple-expand-contained"}, "4.0"); + // }); + }); describe('parameters', () => { @@ -5719,231 +5723,231 @@ describe('UCUM', () => { }); -describe('related', () => { +describe('compare', () => { // Tests for candidate new 'related' operation it("related-all" + 'R5', async () => { - await runTest({"suite":"related","test":"related-all"}, "5.0"); + await runTest({"suite":"compare","test":"related-all"}, "5.0"); }); it("related-all" + 'R4', async () => { - await runTest({"suite":"related","test":"related-all"}, "4.0"); + await runTest({"suite":"compare","test":"related-all"}, "4.0"); }); it("related-active" + 'R5', async () => { - await runTest({"suite":"related","test":"related-active"}, "5.0"); + await runTest({"suite":"compare","test":"related-active"}, "5.0"); }); it("related-active" + 'R4', async () => { - await runTest({"suite":"related","test":"related-active"}, "4.0"); + await runTest({"suite":"compare","test":"related-active"}, "4.0"); }); it("related-inactive" + 'R5', async () => { - await runTest({"suite":"related","test":"related-inactive"}, "5.0"); + await runTest({"suite":"compare","test":"related-inactive"}, "5.0"); }); it("related-inactive" + 'R4', async () => { - await runTest({"suite":"related","test":"related-inactive"}, "4.0"); + await runTest({"suite":"compare","test":"related-inactive"}, "4.0"); }); it("related-enumerated" + 'R5', async () => { - await runTest({"suite":"related","test":"related-enumerated"}, "5.0"); + await runTest({"suite":"compare","test":"related-enumerated"}, "5.0"); }); it("related-enumerated" + 'R4', async () => { - await runTest({"suite":"related","test":"related-enumerated"}, "4.0"); + await runTest({"suite":"compare","test":"related-enumerated"}, "4.0"); }); it("related-is-a" + 'R5', async () => { - await runTest({"suite":"related","test":"related-is-a"}, "5.0"); + await runTest({"suite":"compare","test":"related-is-a"}, "5.0"); }); it("related-is-a" + 'R4', async () => { - await runTest({"suite":"related","test":"related-is-a"}, "4.0"); + await runTest({"suite":"compare","test":"related-is-a"}, "4.0"); }); it("related-regex-1" + 'R5', async () => { - await runTest({"suite":"related","test":"related-regex-1"}, "5.0"); + await runTest({"suite":"compare","test":"related-regex-1"}, "5.0"); }); it("related-regex-1" + 'R4', async () => { - await runTest({"suite":"related","test":"related-regex-1"}, "4.0"); + await runTest({"suite":"compare","test":"related-regex-1"}, "4.0"); }); it("related-regex-2" + 'R5', async () => { - await runTest({"suite":"related","test":"related-regex-2"}, "5.0"); + await runTest({"suite":"compare","test":"related-regex-2"}, "5.0"); }); it("related-regex-2" + 'R4', async () => { - await runTest({"suite":"related","test":"related-regex-2"}, "4.0"); + await runTest({"suite":"compare","test":"related-regex-2"}, "4.0"); }); it("related-lists" + 'R5', async () => { - await runTest({"suite":"related","test":"related-lists"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists"}, "5.0"); }); it("related-lists" + 'R4', async () => { - await runTest({"suite":"related","test":"related-lists"}, "4.0"); + await runTest({"suite":"compare","test":"related-lists"}, "4.0"); }); it("related-lists-more" + 'R5', async () => { - await runTest({"suite":"related","test":"related-lists-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-more"}, "5.0"); }); it("related-lists-more" + 'R4', async () => { - await runTest({"suite":"related","test":"related-lists-more"}, "4.0"); + await runTest({"suite":"compare","test":"related-lists-more"}, "4.0"); }); it("related-lists-less" + 'R5', async () => { - await runTest({"suite":"related","test":"related-lists-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-less"}, "5.0"); }); it("related-lists-less" + 'R4', async () => { - await runTest({"suite":"related","test":"related-lists-less"}, "4.0"); + await runTest({"suite":"compare","test":"related-lists-less"}, "4.0"); }); it("related-lists-over" + 'R5', async () => { - await runTest({"suite":"related","test":"related-lists-over"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-over"}, "5.0"); }); it("related-lists-over" + 'R4', async () => { - await runTest({"suite":"related","test":"related-lists-over"}, "4.0"); + await runTest({"suite":"compare","test":"related-lists-over"}, "4.0"); }); it("related-lists-disj" + 'R5', async () => { - await runTest({"suite":"related","test":"related-lists-disj"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-disj"}, "5.0"); }); it("related-lists-disj" + 'R4', async () => { - await runTest({"suite":"related","test":"related-lists-disj"}, "4.0"); + await runTest({"suite":"compare","test":"related-lists-disj"}, "4.0"); }); it("related-systems" + 'R5', async () => { - await runTest({"suite":"related","test":"related-systems"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems"}, "5.0"); }); it("related-systems" + 'R4', async () => { - await runTest({"suite":"related","test":"related-systems"}, "4.0"); + await runTest({"suite":"compare","test":"related-systems"}, "4.0"); }); it("related-systems" + 'R5', async () => { - await runTest({"suite":"related","test":"related-systems"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems"}, "5.0"); }); it("related-systems" + 'R4', async () => { - await runTest({"suite":"related","test":"related-systems"}, "4.0"); + await runTest({"suite":"compare","test":"related-systems"}, "4.0"); }); it("related-systems-less" + 'R5', async () => { - await runTest({"suite":"related","test":"related-systems-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems-less"}, "5.0"); }); it("related-systems-less" + 'R4', async () => { - await runTest({"suite":"related","test":"related-systems-less"}, "4.0"); + await runTest({"suite":"compare","test":"related-systems-less"}, "4.0"); }); it("related-systems-more" + 'R5', async () => { - await runTest({"suite":"related","test":"related-systems-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems-more"}, "5.0"); }); it("related-systems-more" + 'R4', async () => { - await runTest({"suite":"related","test":"related-systems-more"}, "4.0"); + await runTest({"suite":"compare","test":"related-systems-more"}, "4.0"); }); it("related-system-disj" + 'R5', async () => { - await runTest({"suite":"related","test":"related-system-disj"}, "5.0"); + await runTest({"suite":"compare","test":"related-system-disj"}, "5.0"); }); it("related-system-disj" + 'R4', async () => { - await runTest({"suite":"related","test":"related-system-disj"}, "4.0"); + await runTest({"suite":"compare","test":"related-system-disj"}, "4.0"); }); it("related-system-over" + 'R5', async () => { - await runTest({"suite":"related","test":"related-system-over"}, "5.0"); + await runTest({"suite":"compare","test":"related-system-over"}, "5.0"); }); it("related-system-over" + 'R4', async () => { - await runTest({"suite":"related","test":"related-system-over"}, "4.0"); + await runTest({"suite":"compare","test":"related-system-over"}, "4.0"); }); it("related-filters-1" + 'R5', async () => { - await runTest({"suite":"related","test":"related-filters-1"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-1"}, "5.0"); }); it("related-filters-1" + 'R4', async () => { - await runTest({"suite":"related","test":"related-filters-1"}, "4.0"); + await runTest({"suite":"compare","test":"related-filters-1"}, "4.0"); }); it("related-filters-2" + 'R5', async () => { - await runTest({"suite":"related","test":"related-filters-2"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-2"}, "5.0"); }); it("related-filters-2" + 'R4', async () => { - await runTest({"suite":"related","test":"related-filters-2"}, "4.0"); + await runTest({"suite":"compare","test":"related-filters-2"}, "4.0"); }); it("related-filters-3" + 'R5', async () => { - await runTest({"suite":"related","test":"related-filters-3"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-3"}, "5.0"); }); it("related-filters-3" + 'R4', async () => { - await runTest({"suite":"related","test":"related-filters-3"}, "4.0"); + await runTest({"suite":"compare","test":"related-filters-3"}, "4.0"); }); it("related-mixed-1" + 'R5', async () => { - await runTest({"suite":"related","test":"related-mixed-1"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1"}, "5.0"); }); it("related-mixed-1" + 'R4', async () => { - await runTest({"suite":"related","test":"related-mixed-1"}, "4.0"); + await runTest({"suite":"compare","test":"related-mixed-1"}, "4.0"); }); it("related-mixed-1-less" + 'R5', async () => { - await runTest({"suite":"related","test":"related-mixed-1-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-less"}, "5.0"); }); it("related-mixed-1-less" + 'R4', async () => { - await runTest({"suite":"related","test":"related-mixed-1-less"}, "4.0"); + await runTest({"suite":"compare","test":"related-mixed-1-less"}, "4.0"); }); it("related-mixed-1-more" + 'R5', async () => { - await runTest({"suite":"related","test":"related-mixed-1-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-more"}, "5.0"); }); it("related-mixed-1-more" + 'R4', async () => { - await runTest({"suite":"related","test":"related-mixed-1-more"}, "4.0"); + await runTest({"suite":"compare","test":"related-mixed-1-more"}, "4.0"); }); it("related-mixed-1-disj" + 'R5', async () => { - await runTest({"suite":"related","test":"related-mixed-1-disj"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-disj"}, "5.0"); }); it("related-mixed-1-disj" + 'R4', async () => { - await runTest({"suite":"related","test":"related-mixed-1-disj"}, "4.0"); + await runTest({"suite":"compare","test":"related-mixed-1-disj"}, "4.0"); }); it("related-mixed-1-over" + 'R5', async () => { - await runTest({"suite":"related","test":"related-mixed-1-over"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-over"}, "5.0"); }); it("related-mixed-1-over" + 'R4', async () => { - await runTest({"suite":"related","test":"related-mixed-1-over"}, "4.0"); + await runTest({"suite":"compare","test":"related-mixed-1-over"}, "4.0"); }); it("related-filters-less" + 'R5', async () => { - await runTest({"suite":"related","test":"related-filters-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-less"}, "5.0"); }); it("related-filters-less" + 'R4', async () => { - await runTest({"suite":"related","test":"related-filters-less"}, "4.0"); + await runTest({"suite":"compare","test":"related-filters-less"}, "4.0"); }); it("related-filters-more" + 'R5', async () => { - await runTest({"suite":"related","test":"related-filters-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-more"}, "5.0"); }); it("related-filters-more" + 'R4', async () => { - await runTest({"suite":"related","test":"related-filters-more"}, "4.0"); + await runTest({"suite":"compare","test":"related-filters-more"}, "4.0"); }); }); @@ -9771,119 +9775,119 @@ describe('UCUM', () => { }); -describe('related', () => { +describe('compare', () => { // Tests for candidate new 'related' operation it("related-all" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-all"}, "5.0"); + await runTest({"suite":"compare","test":"related-all"}, "5.0"); }); it("related-active" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-active"}, "5.0"); + await runTest({"suite":"compare","test":"related-active"}, "5.0"); }); it("related-inactive" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-inactive"}, "5.0"); + await runTest({"suite":"compare","test":"related-inactive"}, "5.0"); }); it("related-enumerated" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-enumerated"}, "5.0"); + await runTest({"suite":"compare","test":"related-enumerated"}, "5.0"); }); it("related-is-a" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-is-a"}, "5.0"); + await runTest({"suite":"compare","test":"related-is-a"}, "5.0"); }); it("related-regex-1" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-regex-1"}, "5.0"); + await runTest({"suite":"compare","test":"related-regex-1"}, "5.0"); }); it("related-regex-2" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-regex-2"}, "5.0"); + await runTest({"suite":"compare","test":"related-regex-2"}, "5.0"); }); it("related-lists" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-lists"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists"}, "5.0"); }); it("related-lists-more" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-lists-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-more"}, "5.0"); }); it("related-lists-less" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-lists-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-less"}, "5.0"); }); it("related-lists-over" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-lists-over"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-over"}, "5.0"); }); it("related-lists-disj" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-lists-disj"}, "5.0"); + await runTest({"suite":"compare","test":"related-lists-disj"}, "5.0"); }); it("related-systems" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-systems"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems"}, "5.0"); }); it("related-systems" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-systems"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems"}, "5.0"); }); it("related-systems-less" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-systems-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems-less"}, "5.0"); }); it("related-systems-more" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-systems-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-systems-more"}, "5.0"); }); it("related-system-disj" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-system-disj"}, "5.0"); + await runTest({"suite":"compare","test":"related-system-disj"}, "5.0"); }); it("related-system-over" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-system-over"}, "5.0"); + await runTest({"suite":"compare","test":"related-system-over"}, "5.0"); }); it("related-filters-1" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-filters-1"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-1"}, "5.0"); }); it("related-filters-2" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-filters-2"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-2"}, "5.0"); }); it("related-filters-3" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-filters-3"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-3"}, "5.0"); }); it("related-mixed-1" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-mixed-1"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1"}, "5.0"); }); it("related-mixed-1-less" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-mixed-1-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-less"}, "5.0"); }); it("related-mixed-1-more" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-mixed-1-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-more"}, "5.0"); }); it("related-mixed-1-disj" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-mixed-1-disj"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-disj"}, "5.0"); }); it("related-mixed-1-over" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-mixed-1-over"}, "5.0"); + await runTest({"suite":"compare","test":"related-mixed-1-over"}, "5.0"); }); it("related-filters-less" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-filters-less"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-less"}, "5.0"); }); it("related-filters-more" + 'R5-cached', async () => { - await runTest({"suite":"related","test":"related-filters-more"}, "5.0"); + await runTest({"suite":"compare","test":"related-filters-more"}, "5.0"); }); });