diff --git a/matching.go b/matching.go index 11cf26e4..8f221d17 100644 --- a/matching.go +++ b/matching.go @@ -171,6 +171,14 @@ func processReader(reader io.Reader, matchRegexes []*regexp.Regexp, data []byte, validMatch = true } } + // When -w is set and all matches for WORD start with a string literal, omit the leading word boundary \b in the first pass. + // First Pass: WORD\b + // String literals are often more quickly searched for before entering the slower regex engine. + // Enforce the leading word boundary requirement in a second pass here. + // Second Pass: \bWORD\b + if len(options.CompletePattern) > 0 && validMatch { + validMatch, _ = regexp.MatchString(options.CompletePattern, newMatches[i].line) + } if validMatch { prevMatch = &newMatches[i] i++ @@ -256,13 +264,16 @@ func getMatches(regex *regexp.Regexp, data []byte, testBuffer []byte, offset int // analyze match and reject false matches if !options.Multiline { // remove newlines at the beginning of the match + skip := false for ; start < length && end > start && data[start] == 0x0a; start++ { + skip = true } // remove newlines at the end of the match for ; end > 0 && end > start && data[end-1] == 0x0a; end-- { + skip = true } // check if the corrected match is still valid - if !regex.Match(testBuffer[start:end]) { + if skip && !regex.Match(testBuffer[start:end]) { continue } // check if the match contains newlines diff --git a/options.go b/options.go index 60f3bdbd..8c066719 100644 --- a/options.go +++ b/options.go @@ -25,6 +25,7 @@ import ( "os/user" "path/filepath" "regexp" + "regexp/syntax" "runtime" "sort" "strconv" @@ -106,6 +107,7 @@ type Options struct { ListTypes bool `long:"list-types" description:"list available file types" json:"-" default-mask:"-"` Version func() `short:"V" long:"version" description:"show version and license information" json:"-"` WordRegexp bool `short:"w" long:"word-regexp" description:"only match on ASCII word boundaries"` + CompletePattern string WriteConfig bool `long:"write-config" description:"save config for loaded configs + given command line arguments" json:"-"` Zip bool `short:"z" long:"zip" description:"search content of compressed .gz files (default: off)"` NoZip func() `short:"Z" long:"no-zip" description:"do not search content of compressed .gz files" json:"-"` @@ -498,6 +500,21 @@ func (o *Options) checkFormats() error { return nil } +// isPrefixStringLiteral determines whether all matches for the regexp must start with a string literal. +func isPrefixStringLiteral(exp string) bool { + re, err := syntax.Parse(exp, syntax.Perl) + if err != nil { + return false + } + re = re.Simplify() + prog, err := syntax.Compile(re) + if err != nil { + return false + } + prefix, _ := prog.Prefix() + return len(prefix) > 0 +} + // preparePattern adjusts a pattern to respect the ignore-case, literal and multiline options func (o *Options) preparePattern(pattern string) string { if o.Literal { @@ -507,11 +524,19 @@ func (o *Options) preparePattern(pattern string) string { pattern = strings.ToLower(pattern) } if o.WordRegexp { - pattern = `\b` + pattern + `\b` + // detect string literal to see if pattern without leading \b can use the fast path + if isPrefixStringLiteral(pattern) { + o.CompletePattern = `\b` + pattern + `\b` + pattern = pattern + `\b` + } else { + pattern = `\b` + pattern + `\b` + } } pattern = "(?m)" + pattern + o.CompletePattern = "(?m)" + o.CompletePattern if o.Multiline { pattern = "(?s)" + pattern + o.CompletePattern = "(?s)" + o.CompletePattern } return pattern }