Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ type Options struct {
HeadlessOptionalArguments goflags.StringSlice
Protocol string
OutputFilterErrorPagePath string
DisableStdout bool
DisableStdout bool

JavascriptCodes goflags.StringSlice

Expand Down Expand Up @@ -696,6 +696,30 @@ func ParseOptions() *Options {
return options
}

func (options *Options) HasMatcherOrFilter() bool {
return len(options.matchStatusCode) > 0 ||
len(options.matchContentLength) > 0 ||
len(options.filterStatusCode) > 0 ||
len(options.filterContentLength) > 0 ||
len(options.matchRegexes) > 0 ||
len(options.filterRegexes) > 0 ||
len(options.matchLinesCount) > 0 ||
len(options.matchWordsCount) > 0 ||
len(options.filterLinesCount) > 0 ||
len(options.filterWordsCount) > 0 ||
len(options.OutputMatchString) > 0 ||
len(options.OutputFilterString) > 0 ||
len(options.OutputMatchFavicon) > 0 ||
len(options.OutputFilterFavicon) > 0 ||
len(options.OutputMatchCdn) > 0 ||
len(options.OutputFilterCdn) > 0 ||
len(options.OutputFilterPageType) > 0 ||
options.OutputMatchCondition != "" ||
options.OutputFilterCondition != "" ||
options.OutputMatchResponseTime != "" ||
options.OutputFilterResponseTime != ""
}

func (options *Options) ValidateOptions() error {
if options.InputFile != "" && !fileutilz.FileNameIsGlob(options.InputFile) && !fileutil.FileExists(options.InputFile) {
return fmt.Errorf("file '%s' does not exist", options.InputFile)
Expand Down
89 changes: 46 additions & 43 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -2458,53 +2458,56 @@ retry:
responseBaseDir := filepath.Join(domainResponseBaseDir, hostFilename)

var responsePath, fileNameHash string
// store response
// store response — when matchers/filters are active, defer writing to the
// output loop so only matched responses are persisted to disk.
if scanopts.StoreResponse || scanopts.StoreChain {
if r.options.OmitBody {
resp.Raw = strings.ReplaceAll(resp.Raw, string(resp.Data), "")
}
responsePath = fileutilz.AbsPathOrDefault(filepath.Join(responseBaseDir, domainResponseFile))
// URL.EscapedString returns that can be used as filename
respRaw := resp.Raw
reqRaw := requestDump
if len(respRaw) > scanopts.MaxResponseBodySizeToSave {
respRaw = respRaw[:scanopts.MaxResponseBodySizeToSave]
}
data := reqRaw
if scanopts.StoreChain && resp.HasChain() {
data = append(data, append([]byte("\n"), []byte(resp.GetChain())...)...)
}
data = append(data, respRaw...)
data = append(data, []byte("\n\n\n")...)
data = append(data, []byte(fullURL)...)
_ = fileutil.CreateFolder(responseBaseDir)

basePath := strings.TrimSuffix(responsePath, ".txt")
var idx int
for idx = 0; ; idx++ {
targetPath := responsePath
if idx > 0 {
targetPath = fmt.Sprintf("%s_%d.txt", basePath, idx)
}
f, err := os.OpenFile(targetPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
if err == nil {
_, writeErr := f.Write(data)
_ = f.Close()
if writeErr != nil {
gologger.Error().Msgf("Could not write to '%s': %s", targetPath, writeErr)
fileNameHash = hash

if !r.options.HasMatcherOrFilter() {
if r.options.OmitBody {
resp.Raw = strings.ReplaceAll(resp.Raw, string(resp.Data), "")
}
responsePath = fileutilz.AbsPathOrDefault(filepath.Join(responseBaseDir, domainResponseFile))
// URL.EscapedString returns that can be used as filename
respRaw := resp.Raw
reqRaw := requestDump
if len(respRaw) > scanopts.MaxResponseBodySizeToSave {
respRaw = respRaw[:scanopts.MaxResponseBodySizeToSave]
}
data := reqRaw
if scanopts.StoreChain && resp.HasChain() {
data = append(data, append([]byte("\n"), []byte(resp.GetChain())...)...)
}
data = append(data, respRaw...)
data = append(data, []byte("\n\n\n")...)
data = append(data, []byte(fullURL)...)
_ = fileutil.CreateFolder(responseBaseDir)

basePath := strings.TrimSuffix(responsePath, ".txt")
var idx int
for idx = 0; ; idx++ {
targetPath := responsePath
if idx > 0 {
targetPath = fmt.Sprintf("%s_%d.txt", basePath, idx)
}
f, err := os.OpenFile(targetPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
if err == nil {
_, writeErr := f.Write(data)
_ = f.Close()
if writeErr != nil {
gologger.Error().Msgf("Could not write to '%s': %s", targetPath, writeErr)
}
break
}
if !os.IsExist(err) {
gologger.Error().Msgf("Failed to create file '%s': %s", targetPath, err)
break
}
break
}
if !os.IsExist(err) {
gologger.Error().Msgf("Failed to create file '%s': %s", targetPath, err)
break
}
}

if idx == 0 {
fileNameHash = hash
} else {
fileNameHash = fmt.Sprintf("%s_%d", hash, idx)
if idx > 0 {
fileNameHash = fmt.Sprintf("%s_%d", hash, idx)
}
}
}
Comment on lines +2461 to 2512

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Persistence is now split between analyze and the output loop, causing double writes without matchers and a uniqueness regression with matchers.

The deferred persistence path (the output loop at lines 1303-1340) still writes the response file unconditionally for every matched result via plain os.WriteFile(responsePath, data, 0644) — it does not use O_EXCL and does not apply the _<idx> suffix logic added here. Concrete consequences of the split:

  • Double write when no matcher/filter is active. Both this block and the output loop run, so the file freshly created here with O_EXCL is then re-written (overwritten) by the output loop's os.WriteFile, doubling I/O for every response.
  • Lost uniqueness when a matcher/filter is active. This branch is skipped, so fileNameHash stays at the bare hash (no _<idx> suffix). The hash is sha1(method + ":" + URL.EscapedString()), which is identical across distinct results that share method+URL — e.g. -probe-all-ips (different CustomIP, same URL string), the goto retry path after protocol fallback, or repeated probing. With matchers active those matched results all collide on the same <hash>.txt and silently overwrite each other.
  • OmitBody is a no-op in the deferred path. The output loop strips via strings.ReplaceAll(resp.Raw, resp.ResponseBody, ""), but Result.ResponseBody is only populated when -response/-base64-response/-mdc/-fdc are set (see lines 2180-2190). The branch here uses string(resp.Data) and works correctly; the deferred branch leaves the body in resp.Raw on disk whenever -omit-body is combined with a matcher/filter without one of those flags.

Recommendation: consolidate persistence into a single path. The cleanest fix is to drop this analyze-side write entirely and move the O_EXCL + _<idx> uniqueness logic and the string(resp.Data)-based OmitBody handling into the output loop (which already runs after matcher/filter evaluation) so uniqueness, body omission, and "only matched" all work together. Updating Result.FileNameHash in the output loop after a successful create would keep downstream consumers (index.txt, callers) consistent.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@runner/runner.go` around lines 2461 - 2512, Remove the analyze-side file
write block that runs when scanopts.StoreResponse or scanopts.StoreChain is true
(the code that creates response files using os.OpenFile with O_EXCL, builds data
from requestDump/resp.Raw/resp.GetChain(), and sets fileNameHash), and instead
implement the O_EXCL + incremental _<idx> filename creation, the
truncation/omit-body logic that uses resp.Data (resp.Raw =
strings.ReplaceAll(resp.Raw, string(resp.Data), "") when r.options.OmitBody),
and the StoreChain append behavior inside the existing output loop that
currently calls os.WriteFile(responsePath, data, 0644); after successfully
creating/writing the file there update Result.FileNameHash (fileNameHash) to
include the _<idx> suffix so downstream consumers see the exact filename — this
consolidates persistence to a single path, prevents double-writes, preserves
uniqueness when r.options.HasMatcherOrFilter() is true, and ensures omit-body
works consistently.


Expand Down
152 changes: 152 additions & 0 deletions runner/runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,158 @@ func TestRunner_testAndSet_concurrent(t *testing.T) {
require.Equal(t, 1, winCount, "exactly one goroutine should win testAndSet for the same key")
}

func TestOptions_hasMatcherOrFilter(t *testing.T) {
tests := []struct {
name string
options Options
expected bool
}{
{
name: "no matchers or filters",
options: Options{},
expected: false,
},
{
name: "match status code",
options: Options{OutputMatchStatusCode: "200"},
expected: true,
},
{
name: "filter status code",
options: Options{OutputFilterStatusCode: "403,401"},
expected: true,
},
{
name: "match string",
options: Options{OutputMatchString: []string{"admin"}},
expected: true,
},
{
name: "filter string",
options: Options{OutputFilterString: []string{"error"}},
expected: true,
},
{
name: "match content length",
options: Options{OutputMatchContentLength: "100"},
expected: true,
},
{
name: "filter content length",
options: Options{OutputFilterContentLength: "0"},
expected: true,
},
{
name: "match regex",
options: Options{OutputMatchRegex: []string{"admin.*panel"}},
expected: true,
},
{
name: "filter regex",
options: Options{OutputFilterRegex: []string{"error"}},
expected: true,
},
{
name: "match lines count",
options: Options{OutputMatchLinesCount: "50"},
expected: true,
},
{
name: "filter lines count",
options: Options{OutputFilterLinesCount: "0"},
expected: true,
},
{
name: "match words count",
options: Options{OutputMatchWordsCount: "100"},
expected: true,
},
{
name: "filter words count",
options: Options{OutputFilterWordsCount: "0"},
expected: true,
},
{
name: "match favicon",
options: Options{OutputMatchFavicon: []string{"1494302000"}},
expected: true,
},
{
name: "filter favicon",
options: Options{OutputFilterFavicon: []string{"1494302000"}},
expected: true,
},
{
name: "match cdn",
options: Options{OutputMatchCdn: []string{"cloudflare"}},
expected: true,
},
{
name: "filter cdn",
options: Options{OutputFilterCdn: []string{"cloudflare"}},
expected: true,
},
{
name: "match condition",
options: Options{OutputMatchCondition: "status_code == 200"},
expected: true,
},
{
name: "filter condition",
options: Options{OutputFilterCondition: "status_code == 403"},
expected: true,
},
{
name: "match response time",
options: Options{OutputMatchResponseTime: "< 1"},
expected: true,
},
{
name: "filter response time",
options: Options{OutputFilterResponseTime: "> 5"},
expected: true,
},
{
name: "filter page type",
options: Options{OutputFilterPageType: []string{"error"}},
expected: true,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
opts := tc.options
err := opts.ValidateOptions()
require.Nil(t, err)
require.Equal(t, tc.expected, opts.HasMatcherOrFilter(),
"HasMatcherOrFilter() should be %v for %s", tc.expected, tc.name)
})
}
}

func TestStoreResponse_withoutMatchersStoresAll(t *testing.T) {
dir := t.TempDir()
opts := &Options{
StoreResponse: true,
StoreResponseDir: dir,
}
err := opts.ValidateOptions()
require.Nil(t, err)
require.False(t, opts.HasMatcherOrFilter())
}

func TestStoreResponse_withMatcherSetsFlag(t *testing.T) {
dir := t.TempDir()
opts := &Options{
StoreResponse: true,
StoreResponseDir: dir,
OutputMatchStatusCode: "200",
}
err := opts.ValidateOptions()
require.Nil(t, err)
require.True(t, opts.HasMatcherOrFilter())
}

func TestRunner_duplicate(t *testing.T) {
const (
pageA = "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n<html><head><title>Welcome</title></head><body>Hello world default page content here</body></html>"
Expand Down
Loading