diff --git a/cmd/functional-test/testcases.txt b/cmd/functional-test/testcases.txt index 34a53b4ce..2eab29b3f 100644 --- a/cmd/functional-test/testcases.txt +++ b/cmd/functional-test/testcases.txt @@ -20,4 +20,5 @@ scanme.sh {{binary}} -silent -jarm https://scanme.sh?a=1*1 {{binary}} -silent https://scanme.sh:443 {{binary}} -asn scanme.sh {{binary}} -silent -tls-impersonate -example.com {{binary}} -silent -bp -strip \ No newline at end of file +example.com {{binary}} -silent -bp -strip +scanme.sh {{binary}} -cpe -silent diff --git a/runner/cpe.go b/runner/cpe.go index 76444fa9f..6861dea86 100644 --- a/runner/cpe.go +++ b/runner/cpe.go @@ -90,6 +90,126 @@ func generateCPE(vendor, product string) string { strings.ToLower(strings.ReplaceAll(product, " ", "_"))) } +// techDetectRequired reports whether tech-detect must run: JSON/CSV output, +// asset upload, and -cpe (which reuses detected versions) all consume the +// technology list. +func techDetectRequired(options *Options) bool { + return options.TechDetect || + options.JSONOutput || + options.CSVOutput || + options.AssetUpload || + options.CPEDetect +} + +// cpeVersionFieldIndex is the zero-based position of the version field in a +// CPE 2.3 formatted string: cpe:2.3:::::... +const cpeVersionFieldIndex = 5 + +// cpeFieldCount is the exact number of colon-separated fields in a well-formed +// CPE 2.3 string: cpe, 2.3, part, vendor, product, version, update, edition, +// language, sw_edition, target_sw, target_hw, other. +const cpeFieldCount = 13 + +// sanitizeCPEVersion normalizes a detected version for embedding in a CPE +// string: trim surrounding space and replace inner spaces with underscores. +// Case is preserved — CPE 2.3 matching is case-insensitive, and lowercasing +// would corrupt semantically meaningful identifiers like 1.0.0-RC1 or 9.0.0.M1. +func sanitizeCPEVersion(version string) string { + return strings.ReplaceAll(strings.TrimSpace(version), " ", "_") +} + +// setCPEVersion returns a copy of a CPE 2.3 string with its version field +// replaced. The input is returned unchanged if version/cpe is empty or the CPE +// is malformed. +func setCPEVersion(cpe, version string) string { + version = sanitizeCPEVersion(version) + if cpe == "" || version == "" { + return cpe + } + // Reserved CPE 2.3 chars (':' field separator, '*'/'?' wildcards) would + // corrupt the field layout or matching semantics; leave the CPE unenriched. + if strings.ContainsAny(version, ":*?") { + return cpe + } + parts := strings.Split(cpe, ":") + if len(parts) != cpeFieldCount || parts[0] != "cpe" || parts[1] != "2.3" { + return cpe + } + parts[cpeVersionFieldIndex] = version + return strings.Join(parts, ":") +} + +// normalizeProductName reduces a product/technology name to its lowercase +// alphanumeric form so the two independent datasets can be joined. The CPE +// product names (awesome-search-queries) are mostly snake_case +// (e.g. "weblogic_server") while wappalyzer reports display names +// (e.g. "WebLogic Server"); stripping every non-alphanumeric rune lets those +// align. It is strictly more permissive than a lower+trim compare, so it never +// drops a previously matching pair, only adds new ones. +func normalizeProductName(name string) string { + var b strings.Builder + b.Grow(len(name)) + for _, r := range name { + switch { + case r >= 'a' && r <= 'z', r >= '0' && r <= '9': + b.WriteRune(r) + case r >= 'A' && r <= 'Z': + b.WriteRune(r + ('a' - 'A')) + } + } + return b.String() +} + +// buildTechVersionMap maps normalized technology name -> version, parsing +// wappalyzer's "Name:version" entries (FormatAppVersion convention). Entries +// without a version are skipped. A product reported with conflicting versions +// is dropped rather than resolved by map iteration order, which is random. +func buildTechVersionMap(technologies []string) map[string]string { + versions := make(map[string]string, len(technologies)) + conflicting := make(map[string]struct{}) + for _, tech := range technologies { + parts := strings.SplitN(tech, ":", 2) + if len(parts) != 2 { + continue + } + name := normalizeProductName(parts[0]) + version := strings.TrimSpace(parts[1]) + if name == "" || version == "" { + continue + } + if _, ok := conflicting[name]; ok { + continue + } + if existing, ok := versions[name]; ok && existing != version { + delete(versions, name) + conflicting[name] = struct{}{} + continue + } + versions[name] = version + } + return versions +} + +// EnrichCPEVersions returns a copy of matches with each CPE version field +// filled from a matching detected technology, keyed by normalized product name +// (see normalizeProductName). Unmatched products keep their '*' version. Inputs +// are not mutated. +func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo { + if len(matches) == 0 || len(technologies) == 0 { + return append([]CPEInfo(nil), matches...) + } + versions := buildTechVersionMap(technologies) + + enriched := make([]CPEInfo, len(matches)) + for i, match := range matches { + enriched[i] = match + if version, ok := versions[normalizeProductName(match.Product)]; ok { + enriched[i].CPE = setCPEVersion(match.CPE, version) + } + } + return enriched +} + func (d *CPEDetector) extractPattern(query string, info CPEInfo) { query = strings.TrimSpace(query) diff --git a/runner/cpe_test.go b/runner/cpe_test.go new file mode 100644 index 000000000..fe163bf0c --- /dev/null +++ b/runner/cpe_test.go @@ -0,0 +1,268 @@ +package runner + +import ( + "testing" + + wappalyzer "github.com/projectdiscovery/wappalyzergo" +) + +func TestSanitizeCPEVersion(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {"plain semver", "2.4.7", "2.4.7"}, + {"case preserved", "1.0.0-RC1", "1.0.0-RC1"}, + {"spaces to underscore", "10 0", "10_0"}, + {"empty", "", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := sanitizeCPEVersion(tt.in); got != tt.want { + t.Fatalf("sanitizeCPEVersion(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} + +func TestTechDetectRequired(t *testing.T) { + tests := []struct { + name string + options *Options + want bool + }{ + {"nothing enabled", &Options{}, false}, + {"tech-detect flag", &Options{TechDetect: true}, true}, + {"json output", &Options{JSONOutput: true}, true}, + {"csv output", &Options{CSVOutput: true}, true}, + {"asset upload", &Options{AssetUpload: true}, true}, + // issue #2476: -cpe alone must turn tech-detect on, because CPE + // enrichment reuses the versions wappalyzer extracts. + {"cpe alone enables tech-detect", &Options{CPEDetect: true}, true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := techDetectRequired(tt.options); got != tt.want { + t.Fatalf("techDetectRequired(%+v) = %v, want %v", tt.options, got, tt.want) + } + }) + } +} + +func TestSetCPEVersion(t *testing.T) { + tests := []struct { + name string + cpe string + version string + want string + }{ + { + name: "fills version slot", + cpe: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*", + version: "14.2.3", + want: "cpe:2.3:a:vercel:next.js:14.2.3:*:*:*:*:*:*:*", + }, + { + name: "empty version leaves cpe unchanged", + cpe: "cpe:2.3:a:apache:apache_http_server:*:*:*:*:*:*:*:*", + version: "", + want: "cpe:2.3:a:apache:apache_http_server:*:*:*:*:*:*:*:*", + }, + { + name: "empty cpe stays empty", + cpe: "", + version: "1.2.3", + want: "", + }, + { + name: "malformed cpe returned unchanged", + cpe: "not-a-cpe", + version: "1.2.3", + want: "not-a-cpe", + }, + { + name: "truncated cpe returned unchanged", + cpe: "cpe:2.3:a:vendor:product:*", + version: "1.2.3", + want: "cpe:2.3:a:vendor:product:*", + }, + { + name: "version with colon leaves cpe unchanged", + cpe: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + version: "1.0:beta", + want: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + }, + { + name: "version with wildcard leaves cpe unchanged", + cpe: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + version: "2.*", + want: "cpe:2.3:a:vendor:product:*:*:*:*:*:*:*:*", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := setCPEVersion(tt.cpe, tt.version); got != tt.want { + t.Fatalf("setCPEVersion(%q, %q) = %q, want %q", tt.cpe, tt.version, got, tt.want) + } + }) + } +} + +func TestNormalizeProductName(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"WebLogic Server", "weblogicserver"}, // wappalyzer display form + {"weblogic_server", "weblogicserver"}, // awesome-search-queries snake_case + {"Next.js", "nextjs"}, // punctuation dropped + {"veeder-root", "veederroot"}, // hyphen dropped + {" Apache HTTP Server ", "apachehttpserver"}, // surrounding space + {"", ""}, + } + for _, tt := range tests { + if got := normalizeProductName(tt.in); got != tt.want { + t.Fatalf("normalizeProductName(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} + +func TestBuildTechVersionMap(t *testing.T) { + techs := []string{ + "Apache HTTP Server:2.4.7", + "PHP:5.5.9", + "Bootstrap", // no version -> not in map + "Next.js:14.2.3", + "jQuery:", // empty version -> not in map + } + got := buildTechVersionMap(techs) + + want := map[string]string{ + "apachehttpserver": "2.4.7", + "php": "5.5.9", + "nextjs": "14.2.3", + } + if len(got) != len(want) { + t.Fatalf("map size = %d, want %d (%v)", len(got), len(want), got) + } + for k, v := range want { + if got[k] != v { + t.Fatalf("got[%q] = %q, want %q", k, got[k], v) + } + } + if _, ok := got["bootstrap"]; ok { + t.Fatalf("bootstrap should not be present (no version)") + } + if _, ok := got["jquery"]; ok { + t.Fatalf("jquery should not be present (empty version)") + } +} + +func TestBuildTechVersionMapConflict(t *testing.T) { + // the same product reported with two versions must be dropped, not resolved + // by random map iteration order. + techs := []string{"Foo:1.2.3", "Foo:1.2.4", "Bar:9.0"} + got := buildTechVersionMap(techs) + + if _, ok := got["foo"]; ok { + t.Fatalf("conflicting product foo should be dropped, got %q", got["foo"]) + } + if got["bar"] != "9.0" { + t.Fatalf("got[bar] = %q, want 9.0", got["bar"]) + } +} + +func TestEnrichCPEVersions(t *testing.T) { + matches := []CPEInfo{ + {Product: "next.js", Vendor: "vercel", CPE: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*"}, + {Product: "Apache HTTP Server", Vendor: "apache", CPE: "cpe:2.3:a:apache:apache_http_server:*:*:*:*:*:*:*:*"}, + {Product: "Bootstrap", Vendor: "getbootstrap", CPE: "cpe:2.3:a:getbootstrap:bootstrap:*:*:*:*:*:*:*:*"}, + // awesome-search-queries reports this product as snake_case while + // wappalyzer reports the display name "WebLogic Server"; normalization + // must still join them. + {Product: "weblogic_server", Vendor: "oracle", CPE: "cpe:2.3:a:oracle:weblogic_server:*:*:*:*:*:*:*:*"}, + } + technologies := []string{"Next.js:14.2.3", "Apache HTTP Server:2.4.7", "Bootstrap", "WebLogic Server:12.2.1"} + + got := EnrichCPEVersions(matches, technologies) + + // issue #2476: next.js version is injected + if got[0].CPE != "cpe:2.3:a:vercel:next.js:14.2.3:*:*:*:*:*:*:*" { + t.Fatalf("next.js CPE = %q, want version 14.2.3 injected", got[0].CPE) + } + // case-insensitive product match works for multi-word names + if got[1].CPE != "cpe:2.3:a:apache:apache_http_server:2.4.7:*:*:*:*:*:*:*" { + t.Fatalf("apache CPE = %q, want version 2.4.7 injected", got[1].CPE) + } + // no detected version -> unchanged (still '*') + if got[2].CPE != "cpe:2.3:a:getbootstrap:bootstrap:*:*:*:*:*:*:*:*" { + t.Fatalf("bootstrap CPE = %q, want unchanged", got[2].CPE) + } + // snake_case product joins display-name technology via normalization + if got[3].CPE != "cpe:2.3:a:oracle:weblogic_server:12.2.1:*:*:*:*:*:*:*" { + t.Fatalf("weblogic CPE = %q, want version 12.2.1 injected", got[3].CPE) + } + // input must not be mutated (immutability) + if matches[0].CPE != "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*" { + t.Fatalf("input matches[0] was mutated: %q", matches[0].CPE) + } +} + +// TestEnrichCPEVersionsWithRealWappalyzer exercises the full contract the +// feature depends on end-to-end: a real wappalyzer fingerprint must yield +// "Name:version" technology entries (FormatAppVersion convention) that +// EnrichCPEVersions can parse and inject. This guards the integration the +// count-only functional test cannot assert. +func TestEnrichCPEVersionsWithRealWappalyzer(t *testing.T) { + wappalyze, err := wappalyzer.New() + if err != nil { + t.Fatalf("could not create wappalyzer: %s", err) + } + + // liferay-portal header carries a version; wappalyzer reports "Liferay:7.3.5". + info := wappalyze.FingerprintWithInfo(map[string][]string{ + "liferay-portal": {"testserver 7.3.5"}, + }, nil) + + var technologies []string + for name := range info { + technologies = append(technologies, name) + } + if !sliceContains(technologies, "Liferay:7.3.5") { + t.Fatalf("expected wappalyzer to emit \"Liferay:7.3.5\", got %v", technologies) + } + + matches := []CPEInfo{ + {Product: "Liferay", Vendor: "liferay", CPE: "cpe:2.3:a:liferay:liferay_portal:*:*:*:*:*:*:*:*"}, + } + got := EnrichCPEVersions(matches, technologies) + if got[0].CPE != "cpe:2.3:a:liferay:liferay_portal:7.3.5:*:*:*:*:*:*:*" { + t.Fatalf("liferay CPE = %q, want version 7.3.5 injected end-to-end", got[0].CPE) + } +} + +func sliceContains(s []string, v string) bool { + for _, e := range s { + if e == v { + return true + } + } + return false +} + +func TestEnrichCPEVersionsNoTechnologies(t *testing.T) { + matches := []CPEInfo{ + {Product: "next.js", Vendor: "vercel", CPE: "cpe:2.3:a:vercel:next.js:*:*:*:*:*:*:*:*"}, + } + got := EnrichCPEVersions(matches, nil) + if got[0].CPE != matches[0].CPE { + t.Fatalf("with no technologies CPE should be unchanged, got %q", got[0].CPE) + } + // the early-return path must still return a copy: mutating the result + // must not reach back into the caller's input slice. + got[0].CPE = "mutated" + if matches[0].CPE == "mutated" { + t.Fatalf("early-return aliased the input slice; want a copy") + } +} diff --git a/runner/options.go b/runner/options.go index 89caa6ccd..3e9a65f9d 100644 --- a/runner/options.go +++ b/runner/options.go @@ -410,7 +410,7 @@ func ParseOptions() *Options { flagSet.BoolVarP(&options.OutputServerHeader, "web-server", "server", false, "display server name"), flagSet.BoolVarP(&options.TechDetect, "tech-detect", "td", false, "display technology in use based on wappalyzer dataset"), flagSet.StringVarP(&options.CustomFingerprintFile, "custom-fingerprint-file", "cff", "", "path to a custom fingerprint file for technology detection"), - flagSet.BoolVar(&options.CPEDetect, "cpe", false, "display CPE (Common Platform Enumeration) based on awesome-search-queries"), + flagSet.BoolVar(&options.CPEDetect, "cpe", false, "display CPE (Common Platform Enumeration) with product version based on awesome-search-queries"), flagSet.BoolVarP(&options.WordPress, "wordpress", "wp", false, "display WordPress plugins and themes"), flagSet.BoolVar(&options.OutputMethod, "method", false, "display http request method"), flagSet.BoolVarP(&options.OutputWebSocket, "websocket", "ws", false, "display server using websocket"), diff --git a/runner/runner.go b/runner/runner.go index 352e4c74c..9764d4658 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -148,7 +148,7 @@ func New(options *Options) (*Runner, error) { var err error if options.Wappalyzer != nil { runner.wappalyzer = options.Wappalyzer - } else if options.TechDetect || options.JSONOutput || options.CSVOutput || options.AssetUpload { + } else if techDetectRequired(options) { runner.wappalyzer, err = func() (*wappalyzer.Wappalyze, error) { if options.CustomFingerprintFile != "" { return wappalyzer.NewFromFile(options.CustomFingerprintFile, true, true) @@ -340,7 +340,7 @@ func New(options *Options) (*Runner, error) { scanopts.OutputResponseTime = options.OutputResponseTime scanopts.NoFallback = options.NoFallback scanopts.NoFallbackScheme = options.NoFallbackScheme - scanopts.TechDetect = options.TechDetect || options.JSONOutput || options.CSVOutput || options.AssetUpload + scanopts.TechDetect = techDetectRequired(options) scanopts.CPEDetect = options.CPEDetect || options.JSONOutput || options.CSVOutput scanopts.WordPress = options.WordPress || options.JSONOutput || options.CSVOutput scanopts.StoreChain = options.StoreChain @@ -2566,7 +2566,7 @@ retry: // As we now have headless body, we can also use it for detecting // more technologies in the response. This is a quick trick to get // more detected technologies. - if r.options.TechDetect || r.options.JSONOutput || r.options.CSVOutput { + if techDetectRequired(r.options) { moreMatches := r.wappalyzer.FingerprintWithInfo(resp.Headers, []byte(headlessBody)) for match, data := range moreMatches { technologies = append(technologies, match) @@ -2599,6 +2599,7 @@ retry: var cpeMatches []CPEInfo if r.cpeDetector != nil { cpeMatches = r.cpeDetector.Detect(title, string(resp.Data), faviconMMH3) + cpeMatches = EnrichCPEVersions(cpeMatches, technologies) if len(cpeMatches) > 0 && r.options.CPEDetect { for _, cpe := range cpeMatches { builder.WriteString(" [")