Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/functional-test/testcases.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ scanme.sh {{binary}} -silent -jarm
https://scanme.sh?a=1*1 {{binary}} -silent
https://scanme.sh:443 {{binary}} -asn
scanme.sh {{binary}} -silent -tls-impersonate
example.com {{binary}} -silent -bp -strip
example.com {{binary}} -silent -bp -strip
scanme.sh {{binary}} -cpe -silent
120 changes: 120 additions & 0 deletions runner/cpe.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,126 @@ func generateCPE(vendor, product string) string {
strings.ToLower(strings.ReplaceAll(product, " ", "_")))
}

// techDetectRequired reports whether tech-detect must run: JSON/CSV output,
// asset upload, and -cpe (which reuses detected versions) all consume the
// technology list.
func techDetectRequired(options *Options) bool {
return options.TechDetect ||
options.JSONOutput ||
options.CSVOutput ||
options.AssetUpload ||
options.CPEDetect
}

// cpeVersionFieldIndex is the zero-based position of the version field in a
// CPE 2.3 formatted string: cpe:2.3:<part>:<vendor>:<product>:<version>:...
const cpeVersionFieldIndex = 5

// cpeFieldCount is the exact number of colon-separated fields in a well-formed
// CPE 2.3 string: cpe, 2.3, part, vendor, product, version, update, edition,
// language, sw_edition, target_sw, target_hw, other.
const cpeFieldCount = 13

// sanitizeCPEVersion normalizes a detected version for embedding in a CPE
// string: trim surrounding space and replace inner spaces with underscores.
// Case is preserved — CPE 2.3 matching is case-insensitive, and lowercasing
// would corrupt semantically meaningful identifiers like 1.0.0-RC1 or 9.0.0.M1.
func sanitizeCPEVersion(version string) string {
return strings.ReplaceAll(strings.TrimSpace(version), " ", "_")
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// setCPEVersion returns a copy of a CPE 2.3 string with its version field
// replaced. The input is returned unchanged if version/cpe is empty or the CPE
// is malformed.
func setCPEVersion(cpe, version string) string {
version = sanitizeCPEVersion(version)
if cpe == "" || version == "" {
return cpe
}
// Reserved CPE 2.3 chars (':' field separator, '*'/'?' wildcards) would
// corrupt the field layout or matching semantics; leave the CPE unenriched.
if strings.ContainsAny(version, ":*?") {
return cpe
}
parts := strings.Split(cpe, ":")
if len(parts) != cpeFieldCount || parts[0] != "cpe" || parts[1] != "2.3" {
return cpe
}
parts[cpeVersionFieldIndex] = version
return strings.Join(parts, ":")
}

// normalizeProductName reduces a product/technology name to its lowercase
// alphanumeric form so the two independent datasets can be joined. The CPE
// product names (awesome-search-queries) are mostly snake_case
// (e.g. "weblogic_server") while wappalyzer reports display names
// (e.g. "WebLogic Server"); stripping every non-alphanumeric rune lets those
// align. It is strictly more permissive than a lower+trim compare, so it never
// drops a previously matching pair, only adds new ones.
func normalizeProductName(name string) string {
var b strings.Builder
b.Grow(len(name))
for _, r := range name {
switch {
case r >= 'a' && r <= 'z', r >= '0' && r <= '9':
b.WriteRune(r)
case r >= 'A' && r <= 'Z':
b.WriteRune(r + ('a' - 'A'))
}
}
return b.String()
}

// buildTechVersionMap maps normalized technology name -> version, parsing
// wappalyzer's "Name:version" entries (FormatAppVersion convention). Entries
// without a version are skipped. A product reported with conflicting versions
// is dropped rather than resolved by map iteration order, which is random.
func buildTechVersionMap(technologies []string) map[string]string {
versions := make(map[string]string, len(technologies))
conflicting := make(map[string]struct{})
for _, tech := range technologies {
parts := strings.SplitN(tech, ":", 2)
if len(parts) != 2 {
continue
}
name := normalizeProductName(parts[0])
version := strings.TrimSpace(parts[1])
if name == "" || version == "" {
continue
}
if _, ok := conflicting[name]; ok {
continue
}
if existing, ok := versions[name]; ok && existing != version {
delete(versions, name)
conflicting[name] = struct{}{}
continue
}
versions[name] = version
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return versions
}

// EnrichCPEVersions returns a copy of matches with each CPE version field
// filled from a matching detected technology, keyed by normalized product name
// (see normalizeProductName). Unmatched products keep their '*' version. Inputs
// are not mutated.
func EnrichCPEVersions(matches []CPEInfo, technologies []string) []CPEInfo {
if len(matches) == 0 || len(technologies) == 0 {
return append([]CPEInfo(nil), matches...)
}
versions := buildTechVersionMap(technologies)

enriched := make([]CPEInfo, len(matches))
for i, match := range matches {
enriched[i] = match
if version, ok := versions[normalizeProductName(match.Product)]; ok {
enriched[i].CPE = setCPEVersion(match.CPE, version)
}
}
return enriched
}

func (d *CPEDetector) extractPattern(query string, info CPEInfo) {
query = strings.TrimSpace(query)

Expand Down
Loading
Loading