From ec2b7154bc83b2174a320d8bb87ba5d49235b782 Mon Sep 17 00:00:00 2001 From: Connor Larson Date: Mon, 22 Dec 2025 12:22:10 -0700 Subject: [PATCH 1/7] changed resp.Raw contstruction to cap the body, avoiding oom issue --- common/httpx/httpx.go | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/common/httpx/httpx.go b/common/httpx/httpx.go index 39d1ce38..39dd18d4 100644 --- a/common/httpx/httpx.go +++ b/common/httpx/httpx.go @@ -7,12 +7,16 @@ import ( "io" "net" "net/http" + "net/http/httputil" "net/url" "os" "strconv" "strings" "time" + // TODO: temp + "log" + "github.com/microcosm-cc/bluemonday" "github.com/projectdiscovery/cdncheck" "github.com/projectdiscovery/fastdialer/fastdialer" @@ -235,39 +239,50 @@ get_response: resp.Headers = httpresp.Header.Clone() - // httputil.DumpResponse does not handle websockets - headers, rawResp, err := pdhttputil.DumpResponseHeadersAndRaw(httpresp) + // Dump headers only (does not consume body) + headers, err := httputil.DumpResponse(httpresp, false) if err != nil { if stringsutil.ContainsAny(err.Error(), "tls: user canceled") { shouldIgnoreErrors = true shouldIgnoreBodyErrors = true } - - // Edge case - some servers respond with gzip encoding header but uncompressed body, in this case the standard library configures the reader as gzip, triggering an error when read. - // The bytes slice is not accessible because of abstraction, therefore we need to perform the request again tampering the Accept-Encoding header - if !gzipRetry && strings.Contains(err.Error(), "gzip: invalid header") { - gzipRetry = true - req.Header.Set("Accept-Encoding", "identity") - goto get_response - } if !shouldIgnoreErrors { return nil, err } } - resp.Raw = string(rawResp) + resp.RawHeaders = string(headers) + var respbody []byte // body shouldn't be read with the following status codes // 101 - Switching Protocols => websockets don't have a readable body // 304 - Not Modified => no body the response terminates with latest header newline if !generic.EqualsAny(httpresp.StatusCode, http.StatusSwitchingProtocols, http.StatusNotModified) { - var err error + // TODO: temp logger + log.Printf("MaxResponseBodySizeToRead=%d\n", h.Options.MaxResponseBodySizeToRead) respbody, err = io.ReadAll(io.LimitReader(httpresp.Body, h.Options.MaxResponseBodySizeToRead)) - if err != nil && !shouldIgnoreBodyErrors { - return nil, err + if err != nil { + // Edge case: some servers respond with gzip encoding header but uncompressed body. + // Retry request with identity encoding. + if !gzipRetry && strings.Contains(err.Error(), "gzip: invalid header") { + gzipRetry = true + req.Header.Set("Accept-Encoding", "identity") + goto get_response + } + if !shouldIgnoreBodyErrors { + return nil, err + } } } + // Build bounded raw response: headers + capped body + // NOTE: resp.Raw must be constructed from a capped body to avoid OOM on infinite streams. + + raw := make([]byte, 0, len(headers)+len(respbody)) + raw = append(raw, headers...) + raw = append(raw, respbody...) + resp.Raw = string(raw) + closeErr := httpresp.Body.Close() if closeErr != nil && !shouldIgnoreBodyErrors { return nil, closeErr From 2773b08d8b78be99e2ad42a033f7563488771401 Mon Sep 17 00:00:00 2001 From: Connor Larson Date: Mon, 22 Dec 2025 12:29:06 -0700 Subject: [PATCH 2/7] changed max response to save and read to 10MB, the common/httpx default --- runner/options.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runner/options.go b/runner/options.go index 0e41fd24..5936c379 100644 --- a/runner/options.go +++ b/runner/options.go @@ -2,7 +2,6 @@ package runner import ( "fmt" - "math" "os" "path/filepath" "regexp" @@ -541,8 +540,9 @@ func ParseOptions() *Options { flagSet.IntVar(&options.Retries, "retries", 0, "number of retries"), flagSet.IntVar(&options.Timeout, "timeout", 10, "timeout in seconds"), flagSet.DurationVar(&options.Delay, "delay", -1, "duration between each http request (eg: 200ms, 1s)"), - flagSet.IntVarP(&options.MaxResponseBodySizeToSave, "response-size-to-save", "rsts", math.MaxInt32, "max response size to save in bytes"), - flagSet.IntVarP(&options.MaxResponseBodySizeToRead, "response-size-to-read", "rstr", math.MaxInt32, "max response size to read in bytes"), + // 10MB max response size matches common/httpx default + flagSet.IntVarP(&options.MaxResponseBodySizeToSave, "response-size-to-save", "rsts", (10*1024*1024), "max response size to save in bytes"), + flagSet.IntVarP(&options.MaxResponseBodySizeToRead, "response-size-to-read", "rstr", (10*1024*1024), "max response size to read in bytes"), ) flagSet.CreateGroup("cloud", "Cloud", From a28b9fa3d6be229bbaaa45923d3d2d5f4794023a Mon Sep 17 00:00:00 2001 From: Connor Larson Date: Mon, 22 Dec 2025 12:31:44 -0700 Subject: [PATCH 3/7] fix: updated README max response size to match edit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c7017878..4d11270c 100644 --- a/README.md +++ b/README.md @@ -254,8 +254,8 @@ OPTIMIZATIONS: -retries int number of retries -timeout int timeout in seconds (default 10) -delay value duration between each http request (eg: 200ms, 1s) (default -1ns) - -rsts, -response-size-to-save int max response size to save in bytes (default 2147483647) - -rstr, -response-size-to-read int max response size to read in bytes (default 2147483647) + -rsts, -response-size-to-save int max response size to save in bytes (default 10485760) + -rstr, -response-size-to-read int max response size to read in bytes (default 10485760) CLOUD: -auth configure projectdiscovery cloud (pdcp) api key (default true) From 85695760cb153f3c8a278447d5d2cf3e8bda8389 Mon Sep 17 00:00:00 2001 From: Connor Larson Date: Mon, 22 Dec 2025 12:37:23 -0700 Subject: [PATCH 4/7] fix: removed temp logger --- common/httpx/httpx.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/common/httpx/httpx.go b/common/httpx/httpx.go index 39dd18d4..d7070523 100644 --- a/common/httpx/httpx.go +++ b/common/httpx/httpx.go @@ -14,9 +14,6 @@ import ( "strings" "time" - // TODO: temp - "log" - "github.com/microcosm-cc/bluemonday" "github.com/projectdiscovery/cdncheck" "github.com/projectdiscovery/fastdialer/fastdialer" @@ -258,8 +255,7 @@ get_response: // 101 - Switching Protocols => websockets don't have a readable body // 304 - Not Modified => no body the response terminates with latest header newline if !generic.EqualsAny(httpresp.StatusCode, http.StatusSwitchingProtocols, http.StatusNotModified) { - // TODO: temp logger - log.Printf("MaxResponseBodySizeToRead=%d\n", h.Options.MaxResponseBodySizeToRead) + respbody, err = io.ReadAll(io.LimitReader(httpresp.Body, h.Options.MaxResponseBodySizeToRead)) if err != nil { // Edge case: some servers respond with gzip encoding header but uncompressed body. From 78bf9c393a04aa37bba3f317f2e47859f2e8dcaf Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Wed, 24 Dec 2025 15:00:10 +0400 Subject: [PATCH 5/7] restoring original logic + limiting read to 512Mb + lint --- README.md | 6 ++--- cmd/httpx/resume.cfg | 2 ++ common/httpx/httpx.go | 47 ++++++++++++++++++--------------------- common/httpx/option.go | 11 ++++++++- common/stringz/stringz.go | 5 +++-- go.mod | 1 + go.sum | 2 ++ runner/options.go | 12 +++++----- 8 files changed, 50 insertions(+), 36 deletions(-) create mode 100644 cmd/httpx/resume.cfg diff --git a/README.md b/README.md index 4d11270c..978b4a16 100644 --- a/README.md +++ b/README.md @@ -254,8 +254,8 @@ OPTIMIZATIONS: -retries int number of retries -timeout int timeout in seconds (default 10) -delay value duration between each http request (eg: 200ms, 1s) (default -1ns) - -rsts, -response-size-to-save int max response size to save in bytes (default 10485760) - -rstr, -response-size-to-read int max response size to read in bytes (default 10485760) + -rsts, -response-size-to-save int max response size to save in bytes (default 2147483647) + -rstr, -response-size-to-read int max response size to read in bytes (default 2147483647) CLOUD: -auth configure projectdiscovery cloud (pdcp) api key (default true) @@ -307,4 +307,4 @@ Probing feature is inspired by [@tomnomnom/httprobe](https://github.com/tomnomno Join Discord - + \ No newline at end of file diff --git a/cmd/httpx/resume.cfg b/cmd/httpx/resume.cfg new file mode 100644 index 00000000..7cb2a56b --- /dev/null +++ b/cmd/httpx/resume.cfg @@ -0,0 +1,2 @@ +resume_from=http://localhost:8000/endless +index=1 diff --git a/common/httpx/httpx.go b/common/httpx/httpx.go index d7070523..039f4c4c 100644 --- a/common/httpx/httpx.go +++ b/common/httpx/httpx.go @@ -7,7 +7,6 @@ import ( "io" "net" "net/http" - "net/http/httputil" "net/url" "os" "strconv" @@ -236,49 +235,47 @@ get_response: resp.Headers = httpresp.Header.Clone() - // Dump headers only (does not consume body) - headers, err := httputil.DumpResponse(httpresp, false) + if h.Options.MaxResponseBodySizeToRead > 0 { + httpresp.Body = io.NopCloser(io.LimitReader(httpresp.Body, h.Options.MaxResponseBodySizeToRead)) + defer func() { + _, _ = io.Copy(io.Discard, httpresp.Body) + _ = httpresp.Body.Close() + }() + } + + // httputil.DumpResponse does not handle websockets + headers, rawResp, err := pdhttputil.DumpResponseHeadersAndRaw(httpresp) if err != nil { if stringsutil.ContainsAny(err.Error(), "tls: user canceled") { shouldIgnoreErrors = true shouldIgnoreBodyErrors = true } + + // Edge case - some servers respond with gzip encoding header but uncompressed body, in this case the standard library configures the reader as gzip, triggering an error when read. + // The bytes slice is not accessible because of abstraction, therefore we need to perform the request again tampering the Accept-Encoding header + if !gzipRetry && strings.Contains(err.Error(), "gzip: invalid header") { + gzipRetry = true + req.Header.Set("Accept-Encoding", "identity") + goto get_response + } if !shouldIgnoreErrors { return nil, err } } - + resp.Raw = string(rawResp) resp.RawHeaders = string(headers) - var respbody []byte // body shouldn't be read with the following status codes // 101 - Switching Protocols => websockets don't have a readable body // 304 - Not Modified => no body the response terminates with latest header newline if !generic.EqualsAny(httpresp.StatusCode, http.StatusSwitchingProtocols, http.StatusNotModified) { - + var err error respbody, err = io.ReadAll(io.LimitReader(httpresp.Body, h.Options.MaxResponseBodySizeToRead)) - if err != nil { - // Edge case: some servers respond with gzip encoding header but uncompressed body. - // Retry request with identity encoding. - if !gzipRetry && strings.Contains(err.Error(), "gzip: invalid header") { - gzipRetry = true - req.Header.Set("Accept-Encoding", "identity") - goto get_response - } - if !shouldIgnoreBodyErrors { - return nil, err - } + if err != nil && !shouldIgnoreBodyErrors { + return nil, err } } - // Build bounded raw response: headers + capped body - // NOTE: resp.Raw must be constructed from a capped body to avoid OOM on infinite streams. - - raw := make([]byte, 0, len(headers)+len(respbody)) - raw = append(raw, headers...) - raw = append(raw, respbody...) - resp.Raw = string(raw) - closeErr := httpresp.Body.Close() if closeErr != nil && !shouldIgnoreBodyErrors { return nil, closeErr diff --git a/common/httpx/option.go b/common/httpx/option.go index b64cfd39..8e57a322 100644 --- a/common/httpx/option.go +++ b/common/httpx/option.go @@ -5,10 +5,19 @@ import ( "strings" "time" + "github.com/dustin/go-humanize" "github.com/projectdiscovery/cdncheck" "github.com/projectdiscovery/networkpolicy" ) +// DefaultMaxResponseBodySize is the default maximum response body size (4GB) +var DefaultMaxResponseBodySize int64 + +func init() { + maxResponseBodySize, _ := humanize.ParseBytes("512Mb") + DefaultMaxResponseBodySize = int64(maxResponseBodySize) +} + // Options contains configuration options for the client type Options struct { RandomAgent bool @@ -66,7 +75,7 @@ var DefaultOptions = Options{ Unsafe: false, CdnCheck: "true", ExcludeCdn: false, - MaxResponseBodySizeToRead: 1024 * 1024 * 10, + MaxResponseBodySizeToRead: DefaultMaxResponseBodySize, // VHOSTs options VHostIgnoreStatusCode: false, VHostIgnoreContentLength: true, diff --git a/common/stringz/stringz.go b/common/stringz/stringz.go index 5033e1cb..294dc9d8 100644 --- a/common/stringz/stringz.go +++ b/common/stringz/stringz.go @@ -85,9 +85,10 @@ func AddURLDefaultPort(rawURL string) string { } // Force default port to be added if not present if u.Port() == "" { - if u.Scheme == urlutil.HTTP { + switch u.Scheme { + case urlutil.HTTP: u.UpdatePort("80") - } else if u.Scheme == urlutil.HTTPS { + case urlutil.HTTPS: u.UpdatePort("443") } } diff --git a/go.mod b/go.mod index cdf1d1aa..3cde4b09 100644 --- a/go.mod +++ b/go.mod @@ -51,6 +51,7 @@ require ( require ( github.com/JohannesKaufmann/html-to-markdown/v2 v2.5.0 + github.com/dustin/go-humanize v1.0.1 github.com/go-viper/mapstructure/v2 v2.4.0 github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1 github.com/weppos/publicsuffix-go v0.50.1 diff --git a/go.sum b/go.sum index 6471e0b6..ba9a4322 100644 --- a/go.sum +++ b/go.sum @@ -116,6 +116,8 @@ github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDD github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 h1:2tV76y6Q9BB+NEBasnqvs7e49aEBFI8ejC89PSnWH+4= github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= diff --git a/runner/options.go b/runner/options.go index 5936c379..7c74ff8f 100644 --- a/runner/options.go +++ b/runner/options.go @@ -22,7 +22,7 @@ import ( "github.com/projectdiscovery/httpx/common/customlist" customport "github.com/projectdiscovery/httpx/common/customports" fileutilz "github.com/projectdiscovery/httpx/common/fileutil" - "github.com/projectdiscovery/httpx/common/httpx" + httpxcommon "github.com/projectdiscovery/httpx/common/httpx" "github.com/projectdiscovery/httpx/common/stringz" "github.com/projectdiscovery/networkpolicy" pdcpauth "github.com/projectdiscovery/utils/auth/pdcp" @@ -540,9 +540,11 @@ func ParseOptions() *Options { flagSet.IntVar(&options.Retries, "retries", 0, "number of retries"), flagSet.IntVar(&options.Timeout, "timeout", 10, "timeout in seconds"), flagSet.DurationVar(&options.Delay, "delay", -1, "duration between each http request (eg: 200ms, 1s)"), - // 10MB max response size matches common/httpx default - flagSet.IntVarP(&options.MaxResponseBodySizeToSave, "response-size-to-save", "rsts", (10*1024*1024), "max response size to save in bytes"), - flagSet.IntVarP(&options.MaxResponseBodySizeToRead, "response-size-to-read", "rstr", (10*1024*1024), "max response size to read in bytes"), + ) + + flagSet.CreateGroup("response", "Response", + flagSet.IntVarP(&options.MaxResponseBodySizeToSave, "response-size-to-save", "rsts", int(httpxcommon.DefaultMaxResponseBodySize), "max response size to save in bytes"), + flagSet.IntVarP(&options.MaxResponseBodySizeToRead, "response-size-to-read", "rstr", int(httpxcommon.DefaultMaxResponseBodySize), "max response size to read in bytes"), ) flagSet.CreateGroup("cloud", "Cloud", @@ -772,7 +774,7 @@ func (options *Options) ValidateOptions() error { options.OutputCDN = "true" } - if !stringsutil.EqualFoldAny(options.Protocol, string(httpx.UNKNOWN), string(httpx.HTTP11)) { + if !stringsutil.EqualFoldAny(options.Protocol, string(httpxcommon.UNKNOWN), string(httpxcommon.HTTP11)) { return fmt.Errorf("invalid protocol: %s", options.Protocol) } From 78bb95e9fb92ffbc99600664890f56d4f15d9130 Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Wed, 24 Dec 2025 15:10:10 +0400 Subject: [PATCH 6/7] removing test file --- cmd/httpx/resume.cfg | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 cmd/httpx/resume.cfg diff --git a/cmd/httpx/resume.cfg b/cmd/httpx/resume.cfg deleted file mode 100644 index 7cb2a56b..00000000 --- a/cmd/httpx/resume.cfg +++ /dev/null @@ -1,2 +0,0 @@ -resume_from=http://localhost:8000/endless -index=1 From 7e6003ca51f0feff783a47e088ba3e8ef475b88c Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Wed, 24 Dec 2025 15:11:53 +0400 Subject: [PATCH 7/7] fixing comment --- common/httpx/option.go | 2 +- runner/options.go | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/common/httpx/option.go b/common/httpx/option.go index 8e57a322..fb108729 100644 --- a/common/httpx/option.go +++ b/common/httpx/option.go @@ -10,7 +10,7 @@ import ( "github.com/projectdiscovery/networkpolicy" ) -// DefaultMaxResponseBodySize is the default maximum response body size (4GB) +// DefaultMaxResponseBodySize is the default maximum response body size var DefaultMaxResponseBodySize int64 func init() { diff --git a/runner/options.go b/runner/options.go index 7c74ff8f..c5c98d7d 100644 --- a/runner/options.go +++ b/runner/options.go @@ -540,9 +540,6 @@ func ParseOptions() *Options { flagSet.IntVar(&options.Retries, "retries", 0, "number of retries"), flagSet.IntVar(&options.Timeout, "timeout", 10, "timeout in seconds"), flagSet.DurationVar(&options.Delay, "delay", -1, "duration between each http request (eg: 200ms, 1s)"), - ) - - flagSet.CreateGroup("response", "Response", flagSet.IntVarP(&options.MaxResponseBodySizeToSave, "response-size-to-save", "rsts", int(httpxcommon.DefaultMaxResponseBodySize), "max response size to save in bytes"), flagSet.IntVarP(&options.MaxResponseBodySizeToRead, "response-size-to-read", "rstr", int(httpxcommon.DefaultMaxResponseBodySize), "max response size to read in bytes"), )