Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion matching.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,14 @@ func processReader(reader io.Reader, matchRegexes []*regexp.Regexp, data []byte,
validMatch = true
}
}
// When -w is set and all matches for WORD start with a string literal, omit the leading word boundary \b in the first pass.
// First Pass: WORD\b
// String literals are often more quickly searched for before entering the slower regex engine.
// Enforce the leading word boundary requirement in a second pass here.
// Second Pass: \bWORD\b
if len(options.CompletePattern) > 0 && validMatch {
validMatch, _ = regexp.MatchString(options.CompletePattern, newMatches[i].line)
}
if validMatch {
prevMatch = &newMatches[i]
i++
Expand Down Expand Up @@ -256,13 +264,16 @@ func getMatches(regex *regexp.Regexp, data []byte, testBuffer []byte, offset int
// analyze match and reject false matches
if !options.Multiline {
// remove newlines at the beginning of the match
skip := false
for ; start < length && end > start && data[start] == 0x0a; start++ {
skip = true
}
// remove newlines at the end of the match
for ; end > 0 && end > start && data[end-1] == 0x0a; end-- {
skip = true
}
// check if the corrected match is still valid
if !regex.Match(testBuffer[start:end]) {
if skip && !regex.Match(testBuffer[start:end]) {
continue
}
// check if the match contains newlines
Expand Down
27 changes: 26 additions & 1 deletion options.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"os/user"
"path/filepath"
"regexp"
"regexp/syntax"
"runtime"
"sort"
"strconv"
Expand Down Expand Up @@ -106,6 +107,7 @@ type Options struct {
ListTypes bool `long:"list-types" description:"list available file types" json:"-" default-mask:"-"`
Version func() `short:"V" long:"version" description:"show version and license information" json:"-"`
WordRegexp bool `short:"w" long:"word-regexp" description:"only match on ASCII word boundaries"`
CompletePattern string
WriteConfig bool `long:"write-config" description:"save config for loaded configs + given command line arguments" json:"-"`
Zip bool `short:"z" long:"zip" description:"search content of compressed .gz files (default: off)"`
NoZip func() `short:"Z" long:"no-zip" description:"do not search content of compressed .gz files" json:"-"`
Expand Down Expand Up @@ -498,6 +500,21 @@ func (o *Options) checkFormats() error {
return nil
}

// isPrefixStringLiteral determines whether all matches for the regexp must start with a string literal.
func isPrefixStringLiteral(exp string) bool {
re, err := syntax.Parse(exp, syntax.Perl)
if err != nil {
return false
}
re = re.Simplify()
prog, err := syntax.Compile(re)
if err != nil {
return false
}
prefix, _ := prog.Prefix()
return len(prefix) > 0
}

// preparePattern adjusts a pattern to respect the ignore-case, literal and multiline options
func (o *Options) preparePattern(pattern string) string {
if o.Literal {
Expand All @@ -507,11 +524,19 @@ func (o *Options) preparePattern(pattern string) string {
pattern = strings.ToLower(pattern)
}
if o.WordRegexp {
pattern = `\b` + pattern + `\b`
// detect string literal to see if pattern without leading \b can use the fast path
if isPrefixStringLiteral(pattern) {
o.CompletePattern = `\b` + pattern + `\b`
pattern = pattern + `\b`
} else {
pattern = `\b` + pattern + `\b`
}
}
pattern = "(?m)" + pattern
o.CompletePattern = "(?m)" + o.CompletePattern
if o.Multiline {
pattern = "(?s)" + pattern
o.CompletePattern = "(?s)" + o.CompletePattern
}
return pattern
}
Expand Down