From f239bb2d273587672262cbfbd0250f7feaa31d35 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 01:34:03 +0300 Subject: [PATCH 01/14] feat: add cpe version-string helpers --- runner/cpe.go | 28 ++++++++++++++++++++ runner/cpe_test.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 runner/cpe_test.go diff --git a/runner/cpe.go b/runner/cpe.go index 76444fa9f..485d25f73 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -90,6 +90,34 @@ func generateCPE(vendor, product string) string { strings.ToLower(strings.ReplaceAll(product, " ", "_"))) } +// cpeVersionFieldIndex is the zero-based position of the version field in a +// CPE 2.3 formatted string: cpe:2.3:::::... +const cpeVersionFieldIndex = 5 + +// sanitizeCPEVersion normalizes a detected version for embedding in a CPE +// string, matching the lowercase + space-to-underscore convention used by +// generateCPE for vendor/product. +func sanitizeCPEVersion(version string) string { + return strings.ToLower(strings.ReplaceAll(strings.TrimSpace(version), " ", "_")) +} + +// setCPEVersion returns a copy of a CPE 2.3 string with its version field +// replaced. An empty version, an empty cpe, or a string that is not a +// well-formed CPE 2.3 value (fewer than the expected fields, or wrong prefix) +// is returned unchanged. +func setCPEVersion(cpe, version string) string { + version = sanitizeCPEVersion(version) + if cpe == "" || version == "" { + return cpe + } + parts := strings.Split(cpe, ":") + if len(parts) <= cpeVersionFieldIndex || parts[0] != "cpe" || parts[1] != "2.3" { + return cpe + } + parts[cpeVersionFieldIndex] = version + return strings.Join(parts, ":") +} + func (d *CPEDetector) extractPattern(query string, info CPEInfo) { query = strings.TrimSpace(query) diff --git a/runner/cpe_test.go b/runner/cpe_test.go new file mode 100644 index 000000000..c3861078d --- /dev/null +++ b/runner/cpe_test.go @@ -0,0 +1,66 @@ +package runner + +import ( + "testing" +) + +func TestSanitizeCPEVersion(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {"plain semver", "2.4.7", "2.4.7"}, + {"uppercase", "1.0.0-RC1", "1.0.0-rc1"}, + {"spaces to underscore", "10 0", "10_0"}, + {"empty", "", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := sanitizeCPEVersion(tt.in); got != tt.want { + t.Fatalf("sanitizeCPEVersion(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} + +func TestSetCPEVersion(t *testing.T) { + tests := []struct { + name string + cpe string + version string + want string + }{ + { + name: "fills version slot", + cpe: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*", + version: "14.2.3", + want: "cpe:2.3:a:vercel:next.js:14.2.3:*:*:*:*:*:*:*", + }, + { + name: "empty version leaves cpe unchanged", + cpe: "cpe:2.3:a:apache:apache_http_server:*:*:*:*:*:*:*:*", + version: "", + want: "cpe:2.3:a:apache:apache_http_server:*:*:*:*:*:*:*:*", + }, + { + name: "empty cpe stays empty", + cpe: "", + version: "1.2.3", + want: "", + }, + { + name: "malformed cpe returned unchanged", + cpe: "not-a-cpe", + version: "1.2.3", + want: "not-a-cpe", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := setCPEVersion(tt.cpe, tt.version); got != tt.want { + t.Fatalf("setCPEVersion(%q, %q) = %q, want %q", tt.cpe, tt.version, got, tt.want) + } + }) + } +} From 55d21175fe9b151ba45a45a9606946d089beee11 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 01:35:17 +0300 Subject: [PATCH 02/14] feat: parse detected tech versions into lookup map --- runner/cpe.go | 22 ++++++++++++++++++++++ runner/cpe_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/runner/cpe.go b/runner/cpe.go index 485d25f73..ae2fc873d 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -118,6 +118,28 @@ func setCPEVersion(cpe, version string) string { return strings.Join(parts, ":") } +// buildTechVersionMap converts wappalyzer technology entries of the form +// "Name:version" (e.g. "Apache HTTP Server:2.4.7") into a lookup keyed by the +// lowercased, trimmed technology name. Entries without a version, or with an +// empty version, are skipped. This mirrors wappalyzergo's FormatAppVersion +// convention where the version is appended after a ':' separator. +func buildTechVersionMap(technologies []string) map[string]string { + versions := make(map[string]string) + for _, tech := range technologies { + parts := strings.SplitN(tech, ":", 2) + if len(parts) != 2 { + continue + } + name := strings.ToLower(strings.TrimSpace(parts[0])) + version := strings.TrimSpace(parts[1]) + if name == "" || version == "" { + continue + } + versions[name] = version + } + return versions +} + func (d *CPEDetector) extractPattern(query string, info CPEInfo) { query = strings.TrimSpace(query) diff --git a/runner/cpe_test.go b/runner/cpe_test.go index c3861078d..e9d1be1dd 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -64,3 +64,34 @@ func TestSetCPEVersion(t *testing.T) { }) } } + +func TestBuildTechVersionMap(t *testing.T) { + techs := []string{ + "Apache HTTP Server:2.4.7", + "PHP:5.5.9", + "Bootstrap", // no version -> not in map + "Next.js:14.2.3", + "jQuery:", // empty version -> not in map + } + got := buildTechVersionMap(techs) + + want := map[string]string{ + "apache http server": "2.4.7", + "php": "5.5.9", + "next.js": "14.2.3", + } + if len(got) != len(want) { + t.Fatalf("map size = %d, want %d (%v)", len(got), len(want), got) + } + for k, v := range want { + if got[k] != v { + t.Fatalf("got[%q] = %q, want %q", k, got[k], v) + } + } + if _, ok := got["bootstrap"]; ok { + t.Fatalf("bootstrap should not be present (no version)") + } + if _, ok := got["jquery"]; ok { + t.Fatalf("jquery should not be present (empty version)") + } +} From a100b6f6883b5e64a5692e64cd9c636856b7cb71 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 01:37:12 +0300 Subject: [PATCH 03/14] feat: enrich cpe matches with detected product versions --- runner/cpe.go | 22 ++++++++++++++++++++++ runner/cpe_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/runner/cpe.go b/runner/cpe.go index ae2fc873d..6c22e1f32 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -140,6 +140,28 @@ func buildTechVersionMap(technologies []string) map[string]string { return versions } +// EnrichCPEVersions returns a new slice of CPEInfo in which the version field +// of each CPE string is filled from a matching detected technology version, +// when available. Matching is by product name, compared case-insensitively +// after trimming. Inputs are not mutated. When the technology sources name a +// product differently than awesome-search-queries, no version is injected and +// the CPE keeps its '*' version (no regression). +func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo { + if len(matches) == 0 { + return matches + } + versions := buildTechVersionMap(technologies) + + enriched := make([]CPEInfo, len(matches)) + for i, match := range matches { + enriched[i] = match + if version, ok := versions[strings.ToLower(strings.TrimSpace(match.Product))]; ok { + enriched[i].CPE = setCPEVersion(match.CPE, version) + } + } + return enriched +} + func (d *CPEDetector) extractPattern(query string, info CPEInfo) { query = strings.TrimSpace(query) diff --git a/runner/cpe_test.go b/runner/cpe_test.go index e9d1be1dd..7b9afe9dd 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -95,3 +95,41 @@ func TestBuildTechVersionMap(t *testing.T) { t.Fatalf("jquery should not be present (empty version)") } } + +func TestEnrichCPEVersions(t *testing.T) { + matches := []CPEInfo{ + {Product: "next.js", Vendor: "vercel", CPE: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*"}, + {Product: "Apache HTTP Server", Vendor: "apache", CPE: "cpe:2.3:a:apache:apache_http_server:*:*:*:*:*:*:*:*"}, + {Product: "Bootstrap", Vendor: "getbootstrap", CPE: "cpe:2.3:a:getbootstrap:bootstrap:*:*:*:*:*:*:*:*"}, + } + technologies := []string{"Next.js:14.2.3", "Apache HTTP Server:2.4.7", "Bootstrap"} + + got := EnrichCPEVersions(matches, technologies) + + // issue #2476: next.js version is injected + if got[0].CPE != "cpe:2.3:a:vercel:next.js:14.2.3:*:*:*:*:*:*:*" { + t.Fatalf("next.js CPE = %q, want version 14.2.3 injected", got[0].CPE) + } + // case-insensitive product match works for multi-word names + if got[1].CPE != "cpe:2.3:a:apache:apache_http_server:2.4.7:*:*:*:*:*:*:*" { + t.Fatalf("apache CPE = %q, want version 2.4.7 injected", got[1].CPE) + } + // no detected version -> unchanged (still '*') + if got[2].CPE != "cpe:2.3:a:getbootstrap:bootstrap:*:*:*:*:*:*:*:*" { + t.Fatalf("bootstrap CPE = %q, want unchanged", got[2].CPE) + } + // input must not be mutated (immutability) + if matches[0].CPE != "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*" { + t.Fatalf("input matches[0] was mutated: %q", matches[0].CPE) + } +} + +func TestEnrichCPEVersionsNoTechnologies(t *testing.T) { + matches := []CPEInfo{ + {Product: "next.js", Vendor: "vercel", CPE: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*"}, + } + got := EnrichCPEVersions(matches, nil) + if got[0].CPE != matches[0].CPE { + t.Fatalf("with no technologies CPE should be unchanged, got %q", got[0].CPE) + } +} From 4a590ff58a11c6779abf4d8bc067d30c44249b7f Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 01:37:55 +0300 Subject: [PATCH 04/14] feat: inject detected version into cpe output --- runner/runner.go | 1 + 1 file changed, 1 insertion(+) diff --git a/runner/runner.go b/runner/runner.go index 352e4c74c..a2be060ed 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -2599,6 +2599,7 @@ retry: var cpeMatches []CPEInfo if r.cpeDetector != nil { cpeMatches = r.cpeDetector.Detect(title, string(resp.Data), faviconMMH3) + cpeMatches = EnrichCPEVersions(cpeMatches, technologies) if len(cpeMatches) > 0 && r.options.CPEDetect { for _, cpe := range cpeMatches { builder.WriteString(" [") From 393327e86d0906e75b815eec7ca68dcb09c03e1b Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 01:42:45 +0300 Subject: [PATCH 05/14] test: add cpe version enrichment functional testcase --- cmd/functional-test/testcases.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/functional-test/testcases.txt b/cmd/functional-test/testcases.txt index 34a53b4ce..2eab29b3f 100644 --- a/cmd/functional-test/testcases.txt +++ b/cmd/functional-test/testcases.txt @@ -20,4 +20,5 @@ scanme.sh {{binary}} -silent -jarm https://scanme.sh?a=1*1 {{binary}} -silent https://scanme.sh:443 {{binary}} -asn scanme.sh {{binary}} -silent -tls-impersonate -example.com {{binary}} -silent -bp -strip \ No newline at end of file +example.com {{binary}} -silent -bp -strip +scanme.sh {{binary}} -cpe -silent From ca3a0619277e9e5d9e787d9d15393040ebe5f9cf Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 02:10:44 +0300 Subject: [PATCH 06/14] fix: enable tech-detect for -cpe so versions populate (#2476) --- runner/cpe.go | 13 +++++++++++++ runner/cpe_test.go | 24 ++++++++++++++++++++++++ runner/runner.go | 2 +- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/runner/cpe.go b/runner/cpe.go index 6c22e1f32..9e4d91774 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -90,6 +90,19 @@ func generateCPE(vendor, product string) string { strings.ToLower(strings.ReplaceAll(product, " ", "_"))) } +// techDetectRequired reports whether technology fingerprinting must run for the +// currently enabled options. Beyond -tech-detect itself, JSON/CSV output and +// asset upload embed the technology list, and -cpe reuses the versions detected +// by wappalyzer to fill the version field of CPE strings — so any of them +// requires tech-detect to populate the technology list. +func techDetectRequired(options *Options) bool { + return options.TechDetect || + options.JSONOutput || + options.CSVOutput || + options.AssetUpload || + options.CPEDetect +} + // cpeVersionFieldIndex is the zero-based position of the version field in a // CPE 2.3 formatted string: cpe:2.3:::::... const cpeVersionFieldIndex = 5 diff --git a/runner/cpe_test.go b/runner/cpe_test.go index 7b9afe9dd..f78da0607 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -24,6 +24,30 @@ func TestSanitizeCPEVersion(t *testing.T) { } } +func TestTechDetectRequired(t *testing.T) { + tests := []struct { + name string + options *Options + want bool + }{ + {"nothing enabled", &Options{}, false}, + {"tech-detect flag", &Options{TechDetect: true}, true}, + {"json output", &Options{JSONOutput: true}, true}, + {"csv output", &Options{CSVOutput: true}, true}, + {"asset upload", &Options{AssetUpload: true}, true}, + // issue #2476: -cpe alone must turn tech-detect on, because CPE + // enrichment reuses the versions wappalyzer extracts. + {"cpe alone enables tech-detect", &Options{CPEDetect: true}, true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := techDetectRequired(tt.options); got != tt.want { + t.Fatalf("techDetectRequired(%+v) = %v, want %v", tt.options, got, tt.want) + } + }) + } +} + func TestSetCPEVersion(t *testing.T) { tests := []struct { name string diff --git a/runner/runner.go b/runner/runner.go index a2be060ed..afede26dd 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -340,7 +340,7 @@ func New(options *Options) (*Runner, error) { scanopts.OutputResponseTime = options.OutputResponseTime scanopts.NoFallback = options.NoFallback scanopts.NoFallbackScheme = options.NoFallbackScheme - scanopts.TechDetect = options.TechDetect || options.JSONOutput || options.CSVOutput || options.AssetUpload + scanopts.TechDetect = techDetectRequired(options) scanopts.CPEDetect = options.CPEDetect || options.JSONOutput || options.CSVOutput scanopts.WordPress = options.WordPress || options.JSONOutput || options.CSVOutput scanopts.StoreChain = options.StoreChain From ba87a34034eaf1ce77af1b79b5e8e05a997ee4df Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 02:12:19 +0300 Subject: [PATCH 07/14] fix: skip cpe version enrichment for values with reserved chars --- runner/cpe.go | 6 ++++++ runner/cpe_test.go | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/runner/cpe.go b/runner/cpe.go index 9e4d91774..8f17f0bd7 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -123,6 +123,12 @@ func setCPEVersion(cpe, version string) string { if cpe == "" || version == "" { return cpe } + // A version that still contains CPE 2.3 structural (':') or wildcard + // ('*', '?') characters would corrupt the field layout or change matching + // semantics. Leave the CPE unenriched rather than emit a malformed value. + if strings.ContainsAny(version, ":*?") { + return cpe + } parts := strings.Split(cpe, ":") if len(parts) <= cpeVersionFieldIndex || parts[0] != "cpe" || parts[1] != "2.3" { return cpe diff --git a/runner/cpe_test.go b/runner/cpe_test.go index f78da0607..200b10944 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -79,6 +79,18 @@ func TestSetCPEVersion(t *testing.T) { version: "1.2.3", want: "not-a-cpe", }, + { + name: "version with colon leaves cpe unchanged", + cpe: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + version: "1.0:beta", + want: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + }, + { + name: "version with wildcard leaves cpe unchanged", + cpe: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + version: "2.*", + want: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From b071d44b79ac0a8252460d87c2040b7c9b7c3e46 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 02:34:32 +0300 Subject: [PATCH 08/14] fix: prevent nil wappalyzer panic when -cpe used alone --- runner/runner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runner/runner.go b/runner/runner.go index afede26dd..9764d4658 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -148,7 +148,7 @@ func New(options *Options) (*Runner, error) { var err error if options.Wappalyzer != nil { runner.wappalyzer = options.Wappalyzer - } else if options.TechDetect || options.JSONOutput || options.CSVOutput || options.AssetUpload { + } else if techDetectRequired(options) { runner.wappalyzer, err = func() (*wappalyzer.Wappalyze, error) { if options.CustomFingerprintFile != "" { return wappalyzer.NewFromFile(options.CustomFingerprintFile, true, true) @@ -2566,7 +2566,7 @@ retry: // As we now have headless body, we can also use it for detecting // more technologies in the response. This is a quick trick to get // more detected technologies. - if r.options.TechDetect || r.options.JSONOutput || r.options.CSVOutput { + if techDetectRequired(r.options) { moreMatches := r.wappalyzer.FingerprintWithInfo(resp.Headers, []byte(headlessBody)) for match, data := range moreMatches { technologies = append(technologies, match) From 588fb0179a364341a427d9b96c7d3baf061d1906 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 02:34:32 +0300 Subject: [PATCH 09/14] fix: tighten cpe 2.3 field validation and presize tech map --- runner/cpe.go | 11 ++++++++--- runner/cpe_test.go | 6 ++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/runner/cpe.go b/runner/cpe.go index 8f17f0bd7..252f34cf0 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -107,6 +107,11 @@ func techDetectRequired(options *Options) bool { // CPE 2.3 formatted string: cpe:2.3:::::... const cpeVersionFieldIndex = 5 +// cpeFieldCount is the exact number of colon-separated fields in a well-formed +// CPE 2.3 string: cpe, 2.3, part, vendor, product, version, update, edition, +// language, sw_edition, target_sw, target_hw, other. +const cpeFieldCount = 13 + // sanitizeCPEVersion normalizes a detected version for embedding in a CPE // string, matching the lowercase + space-to-underscore convention used by // generateCPE for vendor/product. @@ -130,7 +135,7 @@ func setCPEVersion(cpe, version string) string { return cpe } parts := strings.Split(cpe, ":") - if len(parts) <= cpeVersionFieldIndex || parts[0] != "cpe" || parts[1] != "2.3" { + if len(parts) != cpeFieldCount || parts[0] != "cpe" || parts[1] != "2.3" { return cpe } parts[cpeVersionFieldIndex] = version @@ -143,7 +148,7 @@ func setCPEVersion(cpe, version string) string { // empty version, are skipped. This mirrors wappalyzergo's FormatAppVersion // convention where the version is appended after a ':' separator. func buildTechVersionMap(technologies []string) map[string]string { - versions := make(map[string]string) + versions := make(map[string]string, len(technologies)) for _, tech := range technologies { parts := strings.SplitN(tech, ":", 2) if len(parts) != 2 { @@ -166,7 +171,7 @@ func buildTechVersionMap(technologies []string) map[string]string { // product differently than awesome-search-queries, no version is injected and // the CPE keeps its '*' version (no regression). func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo { - if len(matches) == 0 { + if len(matches) == 0 || len(technologies) == 0 { return matches } versions := buildTechVersionMap(technologies) diff --git a/runner/cpe_test.go b/runner/cpe_test.go index 200b10944..ef96ec3b1 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -79,6 +79,12 @@ func TestSetCPEVersion(t *testing.T) { version: "1.2.3", want: "not-a-cpe", }, + { + name: "truncated cpe returned unchanged", + cpe: "cpe:2.3:a:vendor:product:*", + version: "1.2.3", + want: "cpe:2.3:a:vendor:product:*", + }, { name: "version with colon leaves cpe unchanged", cpe: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", From c4e08198ad14407e2fa33cf9d9f4ab8f78080c35 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 03:03:50 +0300 Subject: [PATCH 10/14] docs: note product version in -cpe flag description --- runner/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runner/options.go b/runner/options.go index 89caa6ccd..3e9a65f9d 100644 --- a/runner/options.go +++ b/runner/options.go @@ -410,7 +410,7 @@ func ParseOptions() *Options { flagSet.BoolVarP(&options.OutputServerHeader, "web-server", "server", false, "display server name"), flagSet.BoolVarP(&options.TechDetect, "tech-detect", "td", false, "display technology in use based on wappalyzer dataset"), flagSet.StringVarP(&options.CustomFingerprintFile, "custom-fingerprint-file", "cff", "", "path to a custom fingerprint file for technology detection"), - flagSet.BoolVar(&options.CPEDetect, "cpe", false, "display CPE (Common Platform Enumeration) based on awesome-search-queries"), + flagSet.BoolVar(&options.CPEDetect, "cpe", false, "display CPE (Common Platform Enumeration) with product version based on awesome-search-queries"), flagSet.BoolVarP(&options.WordPress, "wordpress", "wp", false, "display WordPress plugins and themes"), flagSet.BoolVar(&options.OutputMethod, "method", false, "display http request method"), flagSet.BoolVarP(&options.OutputWebSocket, "websocket", "ws", false, "display server using websocket"), From 47c471f48fc536cb539d771094d9d4f513528e30 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 03:11:26 +0300 Subject: [PATCH 11/14] docs: tighten cpe helper comments to match package style --- runner/cpe.go | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/runner/cpe.go b/runner/cpe.go index 252f34cf0..51aa4251e 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -90,11 +90,9 @@ func generateCPE(vendor, product string) string { strings.ToLower(strings.ReplaceAll(product, " ", "_"))) } -// techDetectRequired reports whether technology fingerprinting must run for the -// currently enabled options. Beyond -tech-detect itself, JSON/CSV output and -// asset upload embed the technology list, and -cpe reuses the versions detected -// by wappalyzer to fill the version field of CPE strings — so any of them -// requires tech-detect to populate the technology list. +// techDetectRequired reports whether tech-detect must run: JSON/CSV output, +// asset upload, and -cpe (which reuses detected versions) all consume the +// technology list. func techDetectRequired(options *Options) bool { return options.TechDetect || options.JSONOutput || @@ -120,17 +118,15 @@ func sanitizeCPEVersion(version string) string { } // setCPEVersion returns a copy of a CPE 2.3 string with its version field -// replaced. An empty version, an empty cpe, or a string that is not a -// well-formed CPE 2.3 value (fewer than the expected fields, or wrong prefix) -// is returned unchanged. +// replaced. The input is returned unchanged if version/cpe is empty or the CPE +// is malformed. func setCPEVersion(cpe, version string) string { version = sanitizeCPEVersion(version) if cpe == "" || version == "" { return cpe } - // A version that still contains CPE 2.3 structural (':') or wildcard - // ('*', '?') characters would corrupt the field layout or change matching - // semantics. Leave the CPE unenriched rather than emit a malformed value. + // Reserved CPE 2.3 chars (':' field separator, '*'/'?' wildcards) would + // corrupt the field layout or matching semantics; leave the CPE unenriched. if strings.ContainsAny(version, ":*?") { return cpe } @@ -142,11 +138,9 @@ func setCPEVersion(cpe, version string) string { return strings.Join(parts, ":") } -// buildTechVersionMap converts wappalyzer technology entries of the form -// "Name:version" (e.g. "Apache HTTP Server:2.4.7") into a lookup keyed by the -// lowercased, trimmed technology name. Entries without a version, or with an -// empty version, are skipped. This mirrors wappalyzergo's FormatAppVersion -// convention where the version is appended after a ':' separator. +// buildTechVersionMap maps lowercased technology name -> version, parsing +// wappalyzer's "Name:version" entries (FormatAppVersion convention). Entries +// without a version are skipped. func buildTechVersionMap(technologies []string) map[string]string { versions := make(map[string]string, len(technologies)) for _, tech := range technologies { @@ -164,12 +158,10 @@ func buildTechVersionMap(technologies []string) map[string]string { return versions } -// EnrichCPEVersions returns a new slice of CPEInfo in which the version field -// of each CPE string is filled from a matching detected technology version, -// when available. Matching is by product name, compared case-insensitively -// after trimming. Inputs are not mutated. When the technology sources name a -// product differently than awesome-search-queries, no version is injected and -// the CPE keeps its '*' version (no regression). +// EnrichCPEVersions returns a copy of matches with each CPE version field +// filled from a matching detected technology, keyed by product name +// (case-insensitive). Unmatched products keep their '*' version. Inputs are +// not mutated. func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo { if len(matches) == 0 || len(technologies) == 0 { return matches From 75d0a8f7b038b2e5045b5435cdca08562040cfb0 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 03:22:34 +0300 Subject: [PATCH 12/14] fix(cpe): preserve version case and drop ambiguous tech versions --- runner/cpe.go | 19 +++++++++++++++---- runner/cpe_test.go | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/runner/cpe.go b/runner/cpe.go index 51aa4251e..a803e05ab 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -111,10 +111,11 @@ const cpeVersionFieldIndex = 5 const cpeFieldCount = 13 // sanitizeCPEVersion normalizes a detected version for embedding in a CPE -// string, matching the lowercase + space-to-underscore convention used by -// generateCPE for vendor/product. +// string: trim surrounding space and replace inner spaces with underscores. +// Case is preserved — CPE 2.3 matching is case-insensitive, and lowercasing +// would corrupt semantically meaningful identifiers like 1.0.0-RC1 or 9.0.0.M1. func sanitizeCPEVersion(version string) string { - return strings.ToLower(strings.ReplaceAll(strings.TrimSpace(version), " ", "_")) + return strings.ReplaceAll(strings.TrimSpace(version), " ", "_") } // setCPEVersion returns a copy of a CPE 2.3 string with its version field @@ -140,9 +141,11 @@ func setCPEVersion(cpe, version string) string { // buildTechVersionMap maps lowercased technology name -> version, parsing // wappalyzer's "Name:version" entries (FormatAppVersion convention). Entries -// without a version are skipped. +// without a version are skipped. A product reported with conflicting versions +// is dropped rather than resolved by map iteration order, which is random. func buildTechVersionMap(technologies []string) map[string]string { versions := make(map[string]string, len(technologies)) + conflicting := make(map[string]struct{}) for _, tech := range technologies { parts := strings.SplitN(tech, ":", 2) if len(parts) != 2 { @@ -153,6 +156,14 @@ func buildTechVersionMap(technologies []string) map[string]string { if name == "" || version == "" { continue } + if _, ok := conflicting[name]; ok { + continue + } + if existing, ok := versions[name]; ok && existing != version { + delete(versions, name) + conflicting[name] = struct{}{} + continue + } versions[name] = version } return versions diff --git a/runner/cpe_test.go b/runner/cpe_test.go index ef96ec3b1..8ee64cbe5 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -11,7 +11,7 @@ func TestSanitizeCPEVersion(t *testing.T) { want string }{ {"plain semver", "2.4.7", "2.4.7"}, - {"uppercase", "1.0.0-RC1", "1.0.0-rc1"}, + {"case preserved", "1.0.0-RC1", "1.0.0-RC1"}, {"spaces to underscore", "10 0", "10_0"}, {"empty", "", ""}, } @@ -138,6 +138,20 @@ func TestBuildTechVersionMap(t *testing.T) { } } +func TestBuildTechVersionMapConflict(t *testing.T) { + // the same product reported with two versions must be dropped, not resolved + // by random map iteration order. + techs := []string{"Foo:1.2.3", "Foo:1.2.4", "Bar:9.0"} + got := buildTechVersionMap(techs) + + if _, ok := got["foo"]; ok { + t.Fatalf("conflicting product foo should be dropped, got %q", got["foo"]) + } + if got["bar"] != "9.0" { + t.Fatalf("got[bar] = %q, want 9.0", got["bar"]) + } +} + func TestEnrichCPEVersions(t *testing.T) { matches := []CPEInfo{ {Product: "next.js", Vendor: "vercel", CPE: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*"}, From fc04813c9cf79f5b13a46bfcfa631ae5a8d4df10 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 4 Jun 2026 03:26:56 +0300 Subject: [PATCH 13/14] fix(cpe): return a copy from EnrichCPEVersions early-return path --- runner/cpe.go | 2 +- runner/cpe_test.go | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/runner/cpe.go b/runner/cpe.go index a803e05ab..85c714404 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -175,7 +175,7 @@ func buildTechVersionMap(technologies []string) map[string]string { // not mutated. func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo { if len(matches) == 0 || len(technologies) == 0 { - return matches + return append([]CPEInfo(nil), matches...) } versions := buildTechVersionMap(technologies) diff --git a/runner/cpe_test.go b/runner/cpe_test.go index 8ee64cbe5..06fbeebaa 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -188,4 +188,10 @@ func TestEnrichCPEVersionsNoTechnologies(t *testing.T) { if got[0].CPE != matches[0].CPE { t.Fatalf("with no technologies CPE should be unchanged, got %q", got[0].CPE) } + // the early-return path must still return a copy: mutating the result + // must not reach back into the caller's input slice. + got[0].CPE = "mutated" + if matches[0].CPE == "mutated" { + t.Fatalf("early-return aliased the input slice; want a copy") + } } From b293000d5cbbd9b7573e784a034a601f20e78430 Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Mon, 15 Jun 2026 11:30:56 +0200 Subject: [PATCH 14/14] improve matching --- runner/cpe.go | 33 +++++++++++++++---- runner/cpe_test.go | 79 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 102 insertions(+), 10 deletions(-) diff --git a/runner/cpe.go b/runner/cpe.go index 85c714404..6861dea86 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -139,7 +139,28 @@ func setCPEVersion(cpe, version string) string { return strings.Join(parts, ":") } -// buildTechVersionMap maps lowercased technology name -> version, parsing +// normalizeProductName reduces a product/technology name to its lowercase +// alphanumeric form so the two independent datasets can be joined. The CPE +// product names (awesome-search-queries) are mostly snake_case +// (e.g. "weblogic_server") while wappalyzer reports display names +// (e.g. "WebLogic Server"); stripping every non-alphanumeric rune lets those +// align. It is strictly more permissive than a lower+trim compare, so it never +// drops a previously matching pair, only adds new ones. +func normalizeProductName(name string) string { + var b strings.Builder + b.Grow(len(name)) + for _, r := range name { + switch { + case r >= 'a' && r <= 'z', r >= '0' && r <= '9': + b.WriteRune(r) + case r >= 'A' && r <= 'Z': + b.WriteRune(r + ('a' - 'A')) + } + } + return b.String() +} + +// buildTechVersionMap maps normalized technology name -> version, parsing // wappalyzer's "Name:version" entries (FormatAppVersion convention). Entries // without a version are skipped. A product reported with conflicting versions // is dropped rather than resolved by map iteration order, which is random. @@ -151,7 +172,7 @@ func buildTechVersionMap(technologies []string) map[string]string { if len(parts) != 2 { continue } - name := strings.ToLower(strings.TrimSpace(parts[0])) + name := normalizeProductName(parts[0]) version := strings.TrimSpace(parts[1]) if name == "" || version == "" { continue @@ -170,9 +191,9 @@ func buildTechVersionMap(technologies []string) map[string]string { } // EnrichCPEVersions returns a copy of matches with each CPE version field -// filled from a matching detected technology, keyed by product name -// (case-insensitive). Unmatched products keep their '*' version. Inputs are -// not mutated. +// filled from a matching detected technology, keyed by normalized product name +// (see normalizeProductName). Unmatched products keep their '*' version. Inputs +// are not mutated. func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo { if len(matches) == 0 || len(technologies) == 0 { return append([]CPEInfo(nil), matches...) @@ -182,7 +203,7 @@ func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo { enriched := make([]CPEInfo, len(matches)) for i, match := range matches { enriched[i] = match - if version, ok := versions[strings.ToLower(strings.TrimSpace(match.Product))]; ok { + if version, ok := versions[normalizeProductName(match.Product)]; ok { enriched[i].CPE = setCPEVersion(match.CPE, version) } } diff --git a/runner/cpe_test.go b/runner/cpe_test.go index 06fbeebaa..fe163bf0c 100644 --- a/runner/cpe_test.go +++ b/runner/cpe_test.go @@ -2,6 +2,8 @@ package runner import ( "testing" + + wappalyzer "github.com/projectdiscovery/wappalyzergo" ) func TestSanitizeCPEVersion(t *testing.T) { @@ -107,6 +109,25 @@ func TestSetCPEVersion(t *testing.T) { } } +func TestNormalizeProductName(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"WebLogic Server", "weblogicserver"}, // wappalyzer display form + {"weblogic_server", "weblogicserver"}, // awesome-search-queries snake_case + {"Next.js", "nextjs"}, // punctuation dropped + {"veeder-root", "veederroot"}, // hyphen dropped + {" Apache HTTP Server ", "apachehttpserver"}, // surrounding space + {"", ""}, + } + for _, tt := range tests { + if got := normalizeProductName(tt.in); got != tt.want { + t.Fatalf("normalizeProductName(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + func TestBuildTechVersionMap(t *testing.T) { techs := []string{ "Apache HTTP Server:2.4.7", @@ -118,9 +139,9 @@ func TestBuildTechVersionMap(t *testing.T) { got := buildTechVersionMap(techs) want := map[string]string{ - "apache http server": "2.4.7", - "php": "5.5.9", - "next.js": "14.2.3", + "apachehttpserver": "2.4.7", + "php": "5.5.9", + "nextjs": "14.2.3", } if len(got) != len(want) { t.Fatalf("map size = %d, want %d (%v)", len(got), len(want), got) @@ -157,8 +178,12 @@ func TestEnrichCPEVersions(t *testing.T) { {Product: "next.js", Vendor: "vercel", CPE: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*"}, {Product: "Apache HTTP Server", Vendor: "apache", CPE: "cpe:2.3:a:apache:apache_http_server:*:*:*:*:*:*:*:*"}, {Product: "Bootstrap", Vendor: "getbootstrap", CPE: "cpe:2.3:a:getbootstrap:bootstrap:*:*:*:*:*:*:*:*"}, + // awesome-search-queries reports this product as snake_case while + // wappalyzer reports the display name "WebLogic Server"; normalization + // must still join them. + {Product: "weblogic_server", Vendor: "oracle", CPE: "cpe:2.3:a:oracle:weblogic_server:*:*:*:*:*:*:*:*"}, } - technologies := []string{"Next.js:14.2.3", "Apache HTTP Server:2.4.7", "Bootstrap"} + technologies := []string{"Next.js:14.2.3", "Apache HTTP Server:2.4.7", "Bootstrap", "WebLogic Server:12.2.1"} got := EnrichCPEVersions(matches, technologies) @@ -174,12 +199,58 @@ func TestEnrichCPEVersions(t *testing.T) { if got[2].CPE != "cpe:2.3:a:getbootstrap:bootstrap:*:*:*:*:*:*:*:*" { t.Fatalf("bootstrap CPE = %q, want unchanged", got[2].CPE) } + // snake_case product joins display-name technology via normalization + if got[3].CPE != "cpe:2.3:a:oracle:weblogic_server:12.2.1:*:*:*:*:*:*:*" { + t.Fatalf("weblogic CPE = %q, want version 12.2.1 injected", got[3].CPE) + } // input must not be mutated (immutability) if matches[0].CPE != "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*" { t.Fatalf("input matches[0] was mutated: %q", matches[0].CPE) } } +// TestEnrichCPEVersionsWithRealWappalyzer exercises the full contract the +// feature depends on end-to-end: a real wappalyzer fingerprint must yield +// "Name:version" technology entries (FormatAppVersion convention) that +// EnrichCPEVersions can parse and inject. This guards the integration the +// count-only functional test cannot assert. +func TestEnrichCPEVersionsWithRealWappalyzer(t *testing.T) { + wappalyze, err := wappalyzer.New() + if err != nil { + t.Fatalf("could not create wappalyzer: %s", err) + } + + // liferay-portal header carries a version; wappalyzer reports "Liferay:7.3.5". + info := wappalyze.FingerprintWithInfo(map[string][]string{ + "liferay-portal": {"testserver 7.3.5"}, + }, nil) + + var technologies []string + for name := range info { + technologies = append(technologies, name) + } + if !sliceContains(technologies, "Liferay:7.3.5") { + t.Fatalf("expected wappalyzer to emit \"Liferay:7.3.5\", got %v", technologies) + } + + matches := []CPEInfo{ + {Product: "Liferay", Vendor: "liferay", CPE: "cpe:2.3:a:liferay:liferay_portal:*:*:*:*:*:*:*:*"}, + } + got := EnrichCPEVersions(matches, technologies) + if got[0].CPE != "cpe:2.3:a:liferay:liferay_portal:7.3.5:*:*:*:*:*:*:*" { + t.Fatalf("liferay CPE = %q, want version 7.3.5 injected end-to-end", got[0].CPE) + } +} + +func sliceContains(s []string, v string) bool { + for _, e := range s { + if e == v { + return true + } + } + return false +} + func TestEnrichCPEVersionsNoTechnologies(t *testing.T) { matches := []CPEInfo{ {Product: "next.js", Vendor: "vercel", CPE: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*"},