From 4f7eb0818c480384f39dbb414f0444e1a0eee306 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Wed, 25 Jan 2017 23:44:38 -0500 Subject: [PATCH 01/13] Return proper errors in LoadFile and LoadString - Error messages did not properly chunkID the source filename, leading to @'s next to filenames and such. - LoadFile improperly raised a generic file read error, even if the actual error was a Lua error such as a SyntaxError. --- auxiliary.go | 6 +++++- auxiliary_test.go | 39 +++++++++++++++++++++++++++++++++++++++ fixtures/syntax_error.lua | 3 +++ scanner.go | 5 +++-- 4 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 auxiliary_test.go create mode 100644 fixtures/syntax_error.lua diff --git a/auxiliary.go b/auxiliary.go index ebfc34e..d62c4d0 100644 --- a/auxiliary.go +++ b/auxiliary.go @@ -507,7 +507,11 @@ func LoadFile(l *State, fileName, mode string) error { if f != os.Stdin { _ = f.Close() } - if err != nil { + switch err { + case nil: // do nothing + case SyntaxError: // do nothing + case MemoryError: // do nothing + default: l.SetTop(fileNameIndex) return fileError("read") } diff --git a/auxiliary_test.go b/auxiliary_test.go new file mode 100644 index 0000000..fb488b5 --- /dev/null +++ b/auxiliary_test.go @@ -0,0 +1,39 @@ +package lua + +import "testing" + +func TestLoadFileSyntaxError(t *testing.T) { + l := NewState() + err := LoadFile(l, "fixtures/syntax_error.lua", "") + if err != SyntaxError { + t.Error("didn't return SyntaxError on file with syntax error") + } + if l.Top() != 1 { + t.Error("didn't push anything to the stack") + } + if l.IsString(-1) != true { + t.Error("didn't push a string to the stack") + } + estr, _ := l.ToString(-1) + if estr != "fixtures/syntax_error.lua:4: syntax error near " { + t.Error("didn't push the correct error string") + } +} + +func TestLoadStringSyntaxError(t *testing.T) { + l := NewState() + err := LoadString(l, "this_is_a_syntax_error") + if err != SyntaxError { + t.Error("didn't return SyntaxError on string with syntax error") + } + if l.Top() != 1 { + t.Error("didn't push anything to the stack") + } + if l.IsString(-1) != true { + t.Error("didn't push a string to the stack") + } + estr, _ := l.ToString(-1) + if estr != "[string \"this_is_a_syntax_error\"]:1: syntax error near " { + t.Error("didn't push the correct error string") + } +} diff --git a/fixtures/syntax_error.lua b/fixtures/syntax_error.lua new file mode 100644 index 0000000..57ae42f --- /dev/null +++ b/fixtures/syntax_error.lua @@ -0,0 +1,3 @@ +-- A file that should generate a syntax error + +this_is_a_syntax_error diff --git a/scanner.go b/scanner.go index e5e56dd..41b2238 100644 --- a/scanner.go +++ b/scanner.go @@ -99,10 +99,11 @@ func (s *scanner) tokenToString(t rune) string { } func (s *scanner) scanError(message string, token rune) { + buff := chunkID(s.source) if token != 0 { - message = fmt.Sprintf("%s:%d: %s near %s", s.source, s.lineNumber, message, s.tokenToString(token)) + message = fmt.Sprintf("%s:%d: %s near %s", buff, s.lineNumber, message, s.tokenToString(token)) } else { - message = fmt.Sprintf("%s:%d: %s", s.source, s.lineNumber, message) + message = fmt.Sprintf("%s:%d: %s", buff, s.lineNumber, message) } s.l.push(message) s.l.throw(SyntaxError) From 04840582a12d6d78ffc4e3b57586fd54e3075441 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Mon, 30 Jan 2017 19:47:37 -0500 Subject: [PATCH 02/13] Initial work on pattern-matching Currently implements only a very limited subset of `gmatch`. Yes, there is a goto in this commit - it was in the original codebase. --- pattern.go | 199 +++++++++++++++++++++++++++++++++++++++++++++++++++++ string.go | 2 +- 2 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 pattern.go diff --git a/pattern.go b/pattern.go new file mode 100644 index 0000000..d96894a --- /dev/null +++ b/pattern.go @@ -0,0 +1,199 @@ +package lua + +import "unicode" + +const luaMaxCaptures = 32 + +const maxCCalls = 200 + +const lEsc = '%' + +type matchState struct { + matchDepth int + src *string + p *string + l *State + level int + capture [luaMaxCaptures]struct { + init int + len int + } +} + +func classend(ms *matchState, ppos int) int { + switch (*ms.p)[ppos] { + case lEsc: + ppos++ + if ppos == len(*ms.p) { + Errorf(ms.l, "malformed pattern (ends with '%')") + } + return ppos + 1 + case '[': + ppos++ + if (*ms.p)[ppos] == '^' { + ppos++ + } + for { // look for a ']' + if ppos == len(*ms.p) { + Errorf(ms.l, "malformed pattern (missing ']')") + } + ppos++ + if (*ms.p)[ppos] == lEsc && ppos < len(*ms.p) { + ppos++ // skip escapes (e.g. `%]') + } + if (*ms.p)[ppos] == '[' { + break + } + } + return ppos + 1 + default: + return ppos + 1 + } +} + +func matchClass(c byte, cl byte) bool { + var res bool + var rcl rune = rune(cl) + switch unicode.ToLower(rcl) { + // TODO: Implement other cases... + default: + return cl == c + } + if unicode.IsLower(rcl) { + return res + } else { + return !res + } +} + +func singlematch(ms *matchState, spos int, ppos int, eppos int) bool { + if spos >= len(*ms.src) { + return false + } else { + var c byte = (*ms.src)[spos] + switch (*ms.p)[ppos] { + case '.': + return true // matches any char + case lEsc: + return matchClass(c, (*ms.p)[ppos+1]) + case '[': + return false // TODO + default: + return (*ms.p)[ppos] == c + } + } +} + +func match(ms *matchState, spos int, ppos int) (int, bool) { + if ms.matchDepth == 0 { + Errorf(ms.l, "pattern too complex") + } + ms.matchDepth-- + ok := true +init: // using goto's to optimize tail recursion + if ppos != len(*ms.p) { // end of pattern? + switch (*ms.p)[ppos] { + default: // pattern class plus optional suffix + { + eppos := classend(ms, ppos) // points to optional suffix + // does not match at least once? + if !singlematch(ms, spos, ppos, eppos) { + var ep byte = 0 + if eppos != len(*ms.p) { + ep = (*ms.p)[eppos] + } + if ep == '*' || ep == '?' || ep == '-' { // accept empty? + ppos = eppos + 1 + goto init // return match(ms, spos, eppos + 1); + } else { // '+' or no suffix + ok = false // fail + } + } else { // matched once + var ep byte = 0 + if eppos != len(*ms.p) { + ep = (*ms.p)[eppos] + } + switch ep { + case '?': // optional + // TODO + case '+': // 1 or more repetitions + // TODO + fallthrough + case '*': // 0 or more repetitions + // TODO + case '-': // 0 or more repetitions (minimum) + // TODO + default: // no suffix + spos++ + ppos = eppos + goto init + } + } + } + } + } + ms.matchDepth++ + return spos, ok +} + +func pushOnecapture(ms *matchState, i int, spos int, epos int) { + if i >= ms.level { + if i == 0 { // ms->level == 0, too + ms.l.PushString((*ms.src)[spos:epos]) + } else { + Errorf(ms.l, "invalid capture index") + } + } else { + // TODO + } +} + +// TODO: spos and epos can be NULL, how to handle? +func pushCaptures(ms *matchState, spos int, epos int) int { + nlevels := 1 + if !(ms.level == 0) { + nlevels = ms.level + } + CheckStackWithMessage(ms.l, nlevels, "too many captures") + for i := 0; i < nlevels; i++ { + pushOnecapture(ms, i, spos, epos) + } + return nlevels +} + +func gmatchAux(l *State) int { + src, _ := l.ToString(UpValueIndex(1)) + p, _ := l.ToString(UpValueIndex(2)) + + ms := matchState{ + l: l, + matchDepth: maxCCalls, + src: &src, + p: &p, + } + + srcpos, _ := l.ToInteger(UpValueIndex(3)) + for ; srcpos < len(*ms.src); srcpos++ { + ms.level = 0 + epos, ok := match(&ms, srcpos, 0) + if ok { + newstart := epos + if epos == srcpos { + newstart++ + } + l.PushInteger(newstart) + l.Replace(UpValueIndex(3)) + return pushCaptures(&ms, srcpos, epos) + } + } + return 0 +} + +func gmatch(l *State) int { + CheckString(l, 1) + CheckString(l, 2) + l.SetTop(2) + l.PushInteger(0) + l.PushGoClosure(gmatchAux, 3) + return 1 +} diff --git a/string.go b/string.go index 8bbf4c0..e774679 100644 --- a/string.go +++ b/string.go @@ -178,7 +178,7 @@ var stringLibrary = []RegistryFunction{ l.PushString(formatHelper(l, CheckString(l, 1), l.Top())) return 1 }}, - // {"gmatch", ...}, + {"gmatch", gmatch}, // {"gsub", ...}, {"len", func(l *State) int { l.PushInteger(len(CheckString(l, 1))); return 1 }}, {"lower", func(l *State) int { l.PushString(strings.ToLower(CheckString(l, 1))); return 1 }}, From 4ac181c9c4504fa3ef34c1b77d79c2bc9b9f7bde Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Mon, 30 Jan 2017 23:13:16 -0500 Subject: [PATCH 03/13] Put default case into closure In the original C version of match, there is a label after the default case that can be jumped into from above. The goto implementation in Go does not allow us to jump to that label in all cases. So instead, we form a closure from the default case that we can call into anyplace. --- pattern.go | 91 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 35 deletions(-) diff --git a/pattern.go b/pattern.go index d96894a..192d63f 100644 --- a/pattern.go +++ b/pattern.go @@ -90,46 +90,67 @@ func match(ms *matchState, spos int, ppos int) (int, bool) { } ms.matchDepth-- ok := true + + // The default case - return true to goto init + defaultCase := func() bool { + eppos := classend(ms, ppos) // points to optional suffix + // does not match at least once? + if !singlematch(ms, spos, ppos, eppos) { + var ep byte = 0 + if eppos != len(*ms.p) { + ep = (*ms.p)[eppos] + } + if ep == '*' || ep == '?' || ep == '-' { // accept empty? + ppos = eppos + 1 + return true // return match(ms, spos, eppos + 1); + } else { // '+' or no suffix + ok = false // fail + } + } else { // matched once + var ep byte = 0 + if eppos != len(*ms.p) { + ep = (*ms.p)[eppos] + } + switch ep { + case '?': // optional + // TODO + case '+': // 1 or more repetitions + // TODO + fallthrough + case '*': // 0 or more repetitions + // TODO + case '-': // 0 or more repetitions (minimum) + // TODO + default: // no suffix + spos++ + ppos = eppos + return true + } + } + return false + } + init: // using goto's to optimize tail recursion if ppos != len(*ms.p) { // end of pattern? switch (*ms.p)[ppos] { - default: // pattern class plus optional suffix - { - eppos := classend(ms, ppos) // points to optional suffix - // does not match at least once? - if !singlematch(ms, spos, ppos, eppos) { - var ep byte = 0 - if eppos != len(*ms.p) { - ep = (*ms.p)[eppos] - } - if ep == '*' || ep == '?' || ep == '-' { // accept empty? - ppos = eppos + 1 - goto init // return match(ms, spos, eppos + 1); - } else { // '+' or no suffix - ok = false // fail - } - } else { // matched once - var ep byte = 0 - if eppos != len(*ms.p) { - ep = (*ms.p)[eppos] - } - switch ep { - case '?': // optional - // TODO - case '+': // 1 or more repetitions - // TODO - fallthrough - case '*': // 0 or more repetitions - // TODO - case '-': // 0 or more repetitions (minimum) - // TODO - default: // no suffix - spos++ - ppos = eppos - goto init - } + case lEsc: + pnext := (*ms.p)[ppos+1] + switch { + case pnext == 'b': // balanced string? + // TODO + case pnext == 'f': // frontier? + // TODO + case pnext >= '0' && pnext <= '9': /* capture results (%0-%9)? */ + // TODO + default: + if defaultCase() { + goto init } } + default: // pattern class plus optional suffix + if defaultCase() { + goto init + } } } ms.matchDepth++ From 6aaf0ab2f4bed9ca80bf85a82da5dc2e702938be Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Tue, 31 Jan 2017 00:31:33 -0500 Subject: [PATCH 04/13] Fill out matchClass with character tests --- pattern.go | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/pattern.go b/pattern.go index 192d63f..871b97c 100644 --- a/pattern.go +++ b/pattern.go @@ -53,9 +53,30 @@ func classend(ms *matchState, ppos int) int { func matchClass(c byte, cl byte) bool { var res bool - var rcl rune = rune(cl) + var rc, rcl rune = rune(c), rune(cl) switch unicode.ToLower(rcl) { - // TODO: Implement other cases... + case 'a': + res = unicode.IsLetter(rc) + case 'c': + res = unicode.IsControl(rc) + case 'd': + res = unicode.IsDigit(rc) + case 'g': + res = unicode.IsGraphic(rc) && !unicode.IsSpace(rc) + case 'l': + res = unicode.IsLower(rc) + case 'p': + res = unicode.IsPunct(rc) + case 's': + res = unicode.IsSpace(rc) + case 'u': + res = unicode.IsUpper(rc) + case 'w': + res = unicode.In(rc, unicode.Letter, unicode.Number) + case 'x': + res = unicode.In(rc, unicode.Hex_Digit) + case 'z': + res = (c == 0) default: return cl == c } From 648797f57019ae5bf93eb99c0b4f9589ad5e8243 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Tue, 31 Jan 2017 21:00:45 -0500 Subject: [PATCH 05/13] Implement capture groups --- pattern.go | 70 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/pattern.go b/pattern.go index 871b97c..f351fb3 100644 --- a/pattern.go +++ b/pattern.go @@ -4,9 +4,10 @@ import "unicode" const luaMaxCaptures = 32 -const maxCCalls = 200 - -const lEsc = '%' +const ( + capUnfinished = -1 + capPosition = -2 +) type matchState struct { matchDepth int @@ -20,12 +21,27 @@ type matchState struct { } } +const maxCCalls = 200 +const lEsc = '%' + +func captureToClose(ms *matchState) int { + level := ms.level + level-- + for ; level >= 0; level-- { + if ms.capture[level].len == capUnfinished { + return level + } + } + Errorf(ms.l, "invalid pattern capture") + return 0 +} + func classend(ms *matchState, ppos int) int { switch (*ms.p)[ppos] { case lEsc: ppos++ if ppos == len(*ms.p) { - Errorf(ms.l, "malformed pattern (ends with '%')") + Errorf(ms.l, "malformed pattern (ends with '%%')") } return ppos + 1 case '[': @@ -105,6 +121,31 @@ func singlematch(ms *matchState, spos int, ppos int, eppos int) bool { } } +func startCapture(ms *matchState, spos int, ppos int, what int) (int, bool) { + level := ms.level + if level >= luaMaxCaptures { + Errorf(ms.l, "too many captures") + } + ms.capture[level].init = spos + ms.capture[level].len = what + ms.level = level + 1 + res, ok := match(ms, spos, ppos) + if !ok { // match failed? + ms.level-- // undo capture + } + return res, ok +} + +func endCapture(ms *matchState, spos int, ppos int) (int, bool) { + l := captureToClose(ms) + ms.capture[l].len = spos - ms.capture[l].init // close capture + res, ok := match(ms, spos, ppos) + if !ok { // match failed? + ms.capture[l].len = capUnfinished // undo capture + } + return res, ok +} + func match(ms *matchState, spos int, ppos int) (int, bool) { if ms.matchDepth == 0 { Errorf(ms.l, "pattern too complex") @@ -154,6 +195,14 @@ func match(ms *matchState, spos int, ppos int) (int, bool) { init: // using goto's to optimize tail recursion if ppos != len(*ms.p) { // end of pattern? switch (*ms.p)[ppos] { + case '(': // start capture + if (*ms.p)[ppos+1] == ')' { + spos, ok = startCapture(ms, spos, ppos+2, capPosition) + } else { + spos, ok = startCapture(ms, spos, ppos+1, capUnfinished) + } + case ')': // end capture + spos, ok = endCapture(ms, spos, ppos+1) case lEsc: pnext := (*ms.p)[ppos+1] switch { @@ -181,12 +230,21 @@ init: // using goto's to optimize tail recursion func pushOnecapture(ms *matchState, i int, spos int, epos int) { if i >= ms.level { if i == 0 { // ms->level == 0, too - ms.l.PushString((*ms.src)[spos:epos]) + ms.l.PushString((*ms.src)[spos:epos]) // add whole match } else { Errorf(ms.l, "invalid capture index") } } else { - // TODO + l := ms.capture[i].len + if l == capUnfinished { + Errorf(ms.l, "unfinished capture") + } + ipos := ms.capture[i].init + if l == capPosition { + ms.l.PushInteger(ipos) + } else { + ms.l.PushString((*ms.src)[ipos : ipos+l]) + } } } From 7cf2efb1c43047c21eee474c0268296fa2d18f44 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Thu, 2 Feb 2017 00:10:40 -0500 Subject: [PATCH 06/13] Implement match and find, plus enable pm tests Enabling pattern-matching tests also opened the door to fixing a couple of parsing bugs and filling out a few additional features, such as +, *, and $. --- pattern.go | 140 +++++++++++++++++++++++++++++++++++++++++++++++++---- string.go | 28 ++--------- vm_test.go | 9 +++- 3 files changed, 140 insertions(+), 37 deletions(-) diff --git a/pattern.go b/pattern.go index f351fb3..dc5385c 100644 --- a/pattern.go +++ b/pattern.go @@ -1,6 +1,9 @@ package lua -import "unicode" +import ( + "strings" + "unicode" +) const luaMaxCaptures = 32 @@ -23,6 +26,7 @@ type matchState struct { const maxCCalls = 200 const lEsc = '%' +const specials = "^$*+?.([%-" func captureToClose(ms *matchState) int { level := ms.level @@ -121,6 +125,29 @@ func singlematch(ms *matchState, spos int, ppos int, eppos int) bool { } } +func maxExpand(ms *matchState, spos int, ppos int, eppos int) (int, bool) { + i := 0 // counts maximum expand for item + for { + if singlematch(ms, spos+i, ppos, eppos) { + i++ + } else { + break + } + } + // keeps trying to match with the maximum repetitions + for { + if i < 0 { + break + } + res, ok := match(ms, spos+i, eppos+1) + if ok { + return res, ok + } + i-- + } + return 0, false +} + func startCapture(ms *matchState, spos int, ppos int, what int) (int, bool) { level := ms.level if level >= luaMaxCaptures { @@ -175,12 +202,18 @@ func match(ms *matchState, spos int, ppos int) (int, bool) { } switch ep { case '?': // optional - // TODO + res, resOk := match(ms, spos+1, eppos+1) + if resOk { + spos = res + } else { + ppos = eppos + 1 + return true + } case '+': // 1 or more repetitions - // TODO + spos++ // 1 match already done fallthrough case '*': // 0 or more repetitions - // TODO + spos, ok = maxExpand(ms, spos, ppos, eppos) case '-': // 0 or more repetitions (minimum) // TODO default: // no suffix @@ -203,6 +236,16 @@ init: // using goto's to optimize tail recursion } case ')': // end capture spos, ok = endCapture(ms, spos, ppos+1) + case '$': + if ppos+1 != len(*ms.p) { // is the `$' the last char in pattern? + if defaultCase() { + goto init + } + } else { + if spos != len(*ms.src) { + spos, ok = 0, false + } + } case lEsc: pnext := (*ms.p)[ppos+1] switch { @@ -248,10 +291,9 @@ func pushOnecapture(ms *matchState, i int, spos int, epos int) { } } -// TODO: spos and epos can be NULL, how to handle? -func pushCaptures(ms *matchState, spos int, epos int) int { +func pushCaptures(ms *matchState, spos int, epos int, snil bool) int { nlevels := 1 - if !(ms.level == 0) { + if !(ms.level == 0 && !snil) { nlevels = ms.level } CheckStackWithMessage(ms.l, nlevels, "too many captures") @@ -261,14 +303,88 @@ func pushCaptures(ms *matchState, spos int, epos int) int { return nlevels } +func nospecials(p string) bool { + if strings.IndexAny(p, specials) != -1 { + return false + } + return true +} + +func strFindAux(l *State, find bool) int { + s := CheckString(l, 1) + p := CheckString(l, 2) + + init := relativePosition(OptInteger(l, 3, 1), len(s)) + if init < 1 { + init = 1 + } else if init > len(s)+1 { // start after string's end? + l.PushNil() // cannot find anything + return 1 + } + // explicit request or no special characters? + // FIXME: ToBoolean returns true for invalid index + if find && (l.Top() >= 4 && l.ToBoolean(4)) || nospecials(p) { + // do a plain search + s2 := strings.Index(s[init-1:], p) + if s2 != -1 { + l.PushInteger(s2 + init) + l.PushInteger(s2 + init + len(p) - 1) + return 2 + } + } else { + s1 := init - 1 + anchor := p[0] == '^' + if anchor { + p = p[1:] // skip anchor character + } + + ms := matchState{ + l: l, + matchDepth: maxCCalls, + src: &s, + p: &p, + } + + for { + ms.level = 0 + res, ok := match(&ms, s1, 0) + if ok { + if find { + l.PushInteger(s1 + 1) + l.PushInteger(res) + return pushCaptures(&ms, 0, 0, true) + 2 + } else { + return pushCaptures(&ms, s1, res, false) + } + } + + if !(s1 < len(*ms.src) && !anchor) { + break + } + s1++ + } + } + + l.PushNil() + return 1 +} + +func strFind(l *State) int { + return strFindAux(l, true) +} + +func strMatch(l *State) int { + return strFindAux(l, false) +} + func gmatchAux(l *State) int { - src, _ := l.ToString(UpValueIndex(1)) + s, _ := l.ToString(UpValueIndex(1)) p, _ := l.ToString(UpValueIndex(2)) ms := matchState{ l: l, matchDepth: maxCCalls, - src: &src, + src: &s, p: &p, } @@ -283,7 +399,7 @@ func gmatchAux(l *State) int { } l.PushInteger(newstart) l.Replace(UpValueIndex(3)) - return pushCaptures(&ms, srcpos, epos) + return pushCaptures(&ms, srcpos, epos, false) } } return 0 @@ -297,3 +413,7 @@ func gmatch(l *State) int { l.PushGoClosure(gmatchAux, 3) return 1 } + +func strGsub(l *State) int { + return 0 +} diff --git a/string.go b/string.go index e774679..695c7ca 100644 --- a/string.go +++ b/string.go @@ -16,28 +16,6 @@ func relativePosition(pos, length int) int { return length + pos + 1 } -func findHelper(l *State, isFind bool) int { - s, p := CheckString(l, 1), CheckString(l, 2) - init := relativePosition(OptInteger(l, 3, 1), len(s)) - if init < 1 { - init = 1 - } else if init > len(s)+1 { - l.PushNil() - return 1 - } - if isFind && (l.ToBoolean(4) || !strings.ContainsAny(p, "^$*+?.([%-")) { - if start := strings.Index(s[init-1:], p); start >= 0 { - l.PushInteger(start + init) - l.PushInteger(start + init + len(p) - 1) - return 2 - } - } else { - l.assert(false) // TODO implement pattern matching - } - l.PushNil() - return 1 -} - func scanFormat(l *State, fs string) string { i := 0 skipDigit := func() { @@ -173,16 +151,16 @@ var stringLibrary = []RegistryFunction{ return 1 }}, // {"dump", ...}, - {"find", func(l *State) int { return findHelper(l, true) }}, + {"find", strFind}, {"format", func(l *State) int { l.PushString(formatHelper(l, CheckString(l, 1), l.Top())) return 1 }}, {"gmatch", gmatch}, - // {"gsub", ...}, + {"gsub", strGsub}, {"len", func(l *State) int { l.PushInteger(len(CheckString(l, 1))); return 1 }}, {"lower", func(l *State) int { l.PushString(strings.ToLower(CheckString(l, 1))); return 1 }}, - // {"match", ...}, + {"match", strMatch}, {"rep", func(l *State) int { s, n, sep := CheckString(l, 1), CheckInteger(l, 2), OptString(l, 3, "") if n <= 0 { diff --git a/vm_test.go b/vm_test.go index 03e4d8a..dae08a0 100644 --- a/vm_test.go +++ b/vm_test.go @@ -72,7 +72,7 @@ func TestLua(t *testing.T) { // {name: "main"}, {name: "math"}, // {name: "nextvar"}, - // {name: "pm"}, + {name: "pm"}, {name: "sort", nonPort: true}, // sort.lua depends on os.clock(), which is not yet implemented on Windows. {name: "strings"}, // {name: "vararg"}, @@ -108,7 +108,12 @@ func TestLua(t *testing.T) { } // l.Call(0, 0) if err := l.ProtectedCall(0, 0, traceback); err != nil { - t.Errorf("'%s' failed: %s", v.name, err.Error()) + str, ok := l.ToString(-1) + if ok { + t.Errorf("'%s' failed: %s", v.name, str) + } else { + t.Errorf("'%s' failed (no Lua message): %s", v.name, err.Error()) + } } } } From e0366a36c07be78b615b2a0b230e63fad198fa33 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Sun, 5 Feb 2017 18:41:12 -0500 Subject: [PATCH 07/13] Bracket classes and ungreedy 0-or-more repeat --- pattern.go | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/pattern.go b/pattern.go index dc5385c..348d489 100644 --- a/pattern.go +++ b/pattern.go @@ -61,7 +61,7 @@ func classend(ms *matchState, ppos int) int { if (*ms.p)[ppos] == lEsc && ppos < len(*ms.p) { ppos++ // skip escapes (e.g. `%]') } - if (*ms.p)[ppos] == '[' { + if (*ms.p)[ppos] == ']' { break } } @@ -107,6 +107,38 @@ func matchClass(c byte, cl byte) bool { } } +func matchbracketclass(c byte, p string, ppos int, ecpos int) bool { + sig := true + + if p[ppos+1] == '^' { + sig = false + ppos++ // skip the `^' + } + + for { + ppos++ + if ppos >= ecpos { + break + } + + if p[ppos] == lEsc { + ppos++ + if matchClass(c, p[ppos]) { + return sig + } + } else if p[ppos+1] == '-' && ppos+2 < ecpos { + ppos = ppos + 2 + if p[ppos-2] <= c && c <= p[ppos] { + return sig + } + } else if p[ppos] == c { + return sig + } + } + + return !sig +} + func singlematch(ms *matchState, spos int, ppos int, eppos int) bool { if spos >= len(*ms.src) { return false @@ -118,7 +150,7 @@ func singlematch(ms *matchState, spos int, ppos int, eppos int) bool { case lEsc: return matchClass(c, (*ms.p)[ppos+1]) case '[': - return false // TODO + return matchbracketclass(c, *ms.p, ppos, eppos-1) default: return (*ms.p)[ppos] == c } @@ -148,6 +180,19 @@ func maxExpand(ms *matchState, spos int, ppos int, eppos int) (int, bool) { return 0, false } +func minExpand(ms *matchState, spos int, ppos int, eppos int) (int, bool) { + for { + res, ok := match(ms, spos, eppos+1) + if ok { + return res, true + } else if singlematch(ms, spos, ppos, eppos) { + spos++ + } else { + return 0, false + } + } +} + func startCapture(ms *matchState, spos int, ppos int, what int) (int, bool) { level := ms.level if level >= luaMaxCaptures { @@ -215,7 +260,7 @@ func match(ms *matchState, spos int, ppos int) (int, bool) { case '*': // 0 or more repetitions spos, ok = maxExpand(ms, spos, ppos, eppos) case '-': // 0 or more repetitions (minimum) - // TODO + spos, ok = minExpand(ms, spos, ppos, eppos) default: // no suffix spos++ ppos = eppos From ccb79be83a2b219b166042ddfe864f5b96e7df4e Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Mon, 6 Feb 2017 22:32:24 -0500 Subject: [PATCH 08/13] Add string.gsub and capture support --- pattern.go | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 3 deletions(-) diff --git a/pattern.go b/pattern.go index 348d489..0bba09b 100644 --- a/pattern.go +++ b/pattern.go @@ -1,6 +1,7 @@ package lua import ( + "bytes" "strings" "unicode" ) @@ -28,6 +29,14 @@ const maxCCalls = 200 const lEsc = '%' const specials = "^$*+?.([%-" +func checkCapture(ms *matchState, l int) int { + l = l - '1' + if l < 0 || l >= ms.level || ms.capture[l].len == capUnfinished { + Errorf(ms.l, "invalid capture index %%%d", l+1) + } + return l +} + func captureToClose(ms *matchState) int { level := ms.level level-- @@ -218,6 +227,25 @@ func endCapture(ms *matchState, spos int, ppos int) (int, bool) { return res, ok } +func matchCapture(ms *matchState, spos int, l int) (int, bool) { + l = checkCapture(ms, l) + ln := ms.capture[l].len + + // memcmp(ms->capture[l].init, s, len) + capBytes := (*ms.src)[ms.capture[l].init : ms.capture[l].init+ln] + sposln := len(*ms.src) - spos + if ln < sposln { + sposln = ln + } + sposBytes := (*ms.src)[spos : spos+sposln] + + if len(*ms.src)-spos >= ln && strings.Compare(capBytes, sposBytes) == 0 { + return spos + ln, true + } else { + return 0, false + } +} + func match(ms *matchState, spos int, ppos int) (int, bool) { if ms.matchDepth == 0 { Errorf(ms.l, "pattern too complex") @@ -299,7 +327,11 @@ init: // using goto's to optimize tail recursion case pnext == 'f': // frontier? // TODO case pnext >= '0' && pnext <= '9': /* capture results (%0-%9)? */ - // TODO + spos, ok = matchCapture(ms, spos, int((*ms.p)[ppos+1])) + if ok { + ppos = ppos + 2 + goto init + } default: if defaultCase() { goto init @@ -329,7 +361,7 @@ func pushOnecapture(ms *matchState, i int, spos int, epos int) { } ipos := ms.capture[i].init if l == capPosition { - ms.l.PushInteger(ipos) + ms.l.PushInteger(ipos + 1) } else { ms.l.PushString((*ms.src)[ipos : ipos+l]) } @@ -459,6 +491,106 @@ func gmatch(l *State) int { return 1 } +func addS(ms *matchState, b *bytes.Buffer, spos int, epos int) { + news, _ := ms.l.ToString(3) + for i := 0; i < len(news); i++ { + if news[i] != lEsc { + b.WriteByte(news[i]) + } else { + i++ // skip ESC + if !unicode.IsDigit(rune(news[i])) { + if news[i] != lEsc { + Errorf(ms.l, "invalid use of '%%' in replacement string") + } + b.WriteByte(news[i]) + } else if news[i] == '0' { + b.WriteString((*ms.src)[spos:epos]) + } else { + pushOnecapture(ms, int(news[i]-'1'), spos, epos) + bs, _ := ms.l.ToString(-1) // add capture to accumulated result + b.WriteString(bs) + ms.l.Pop(1) + } + } + } +} + +func addValue(ms *matchState, b *bytes.Buffer, spos int, epos int, tr Type) { + switch tr { + case TypeFunction: + ms.l.PushValue(3) + n := pushCaptures(ms, spos, epos, false) + ms.l.Call(n, 1) + case TypeTable: + pushOnecapture(ms, 0, spos, epos) + ms.l.Table(3) + default: // TypeNumber or TypeString + addS(ms, b, spos, epos) + return + } + + if !ms.l.ToBoolean(-1) { // nil or false? + ms.l.Pop(1) + ms.l.PushString((*ms.src)[spos:epos]) // keep original text + } else if !ms.l.IsString(-1) { + Errorf(ms.l, "invalid replacement value (a %s)", TypeNameOf(ms.l, -1)) + } + + bs, _ := ms.l.ToString(-1) // add result to accumulator + b.WriteString(bs) + ms.l.Pop(1) +} + func strGsub(l *State) int { - return 0 + src := CheckString(l, 1) + p := CheckString(l, 2) + tr := l.TypeOf(3) + maxS := OptInteger(l, 4, len(src)+1) + + anchor := p[0] == '^' + n := 0 + + ArgumentCheck(l, tr == TypeNumber || tr == TypeString || tr == TypeFunction || tr == TypeTable, 3, "string/function/table expected") + if anchor { + p = p[1:] // skip anchor character + } + + ms := matchState{ + l: l, + matchDepth: maxCCalls, + src: &src, + p: &p, + } + srcpos := 0 + b := new(bytes.Buffer) + + for { + if n >= maxS { + break + } + + ms.level = 0 + epos, ok := match(&ms, srcpos, 0) + if ok { + n++ + addValue(&ms, b, srcpos, epos, tr) + } + if ok && epos > srcpos { // non empty match? + srcpos = epos // skip it + } else if srcpos < len(src) { + b.WriteByte(src[srcpos]) + srcpos++ + } else { + break + } + if anchor { + break + } + } + + b.WriteString(src[srcpos:]) + l.PushString(b.String()) + l.PushInteger(n) // number of substitutions + + return 2 } From ccda1f1a7ed98c000c55d76fed0c2e69b6bbbbe5 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Tue, 7 Feb 2017 21:34:00 -0500 Subject: [PATCH 09/13] Bugfixes - When looking for the end of a square-bracket class, properly skip past an escaped character. - Properly check for all forms of punctuation in %p test, not just what unicode considers punctuation. - Fix out-of-bounds read in gsub function. --- pattern.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pattern.go b/pattern.go index 0bba09b..d8bda44 100644 --- a/pattern.go +++ b/pattern.go @@ -68,7 +68,7 @@ func classend(ms *matchState, ppos int) int { } ppos++ if (*ms.p)[ppos] == lEsc && ppos < len(*ms.p) { - ppos++ // skip escapes (e.g. `%]') + ppos = ppos + 2 // skip escapes (e.g. `%]') } if (*ms.p)[ppos] == ']' { break @@ -95,7 +95,7 @@ func matchClass(c byte, cl byte) bool { case 'l': res = unicode.IsLower(rc) case 'p': - res = unicode.IsPunct(rc) + res = unicode.In(rc, unicode.Mark, unicode.Punct, unicode.Symbol) case 's': res = unicode.IsSpace(rc) case 'u': @@ -547,7 +547,7 @@ func strGsub(l *State) int { tr := l.TypeOf(3) maxS := OptInteger(l, 4, len(src)+1) - anchor := p[0] == '^' + anchor := len(p) > 0 && p[0] == '^' n := 0 ArgumentCheck(l, tr == TypeNumber || tr == TypeString || tr == TypeFunction || tr == TypeTable, 3, "string/function/table expected") From 5006d1089f7b1e018b5a4a30c13cbcda4b23fcfe Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Wed, 8 Feb 2017 23:51:04 -0500 Subject: [PATCH 10/13] All pm.lua tests pass! - Implement balanced string and frontier matching. - Fix error messages to actually show up. - Fix a few nasty out of bounds slice accesses. - Run gmatch against the very last position of the source string. --- pattern.go | 74 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 9 deletions(-) diff --git a/pattern.go b/pattern.go index d8bda44..8e93649 100644 --- a/pattern.go +++ b/pattern.go @@ -53,7 +53,7 @@ func classend(ms *matchState, ppos int) int { switch (*ms.p)[ppos] { case lEsc: ppos++ - if ppos == len(*ms.p) { + if ppos >= len(*ms.p) { Errorf(ms.l, "malformed pattern (ends with '%%')") } return ppos + 1 @@ -63,14 +63,14 @@ func classend(ms *matchState, ppos int) int { ppos++ } for { // look for a ']' - if ppos == len(*ms.p) { + if ppos >= len(*ms.p) { Errorf(ms.l, "malformed pattern (missing ']')") } ppos++ - if (*ms.p)[ppos] == lEsc && ppos < len(*ms.p) { + if ppos < len(*ms.p) && (*ms.p)[ppos] == lEsc { ppos = ppos + 2 // skip escapes (e.g. `%]') } - if (*ms.p)[ppos] == ']' { + if ppos < len(*ms.p) && (*ms.p)[ppos] == ']' { break } } @@ -166,6 +166,36 @@ func singlematch(ms *matchState, spos int, ppos int, eppos int) bool { } } +func matchbalance(ms *matchState, spos int, ppos int) (int, bool) { + if ppos >= len(*ms.p)-1 { + Errorf(ms.l, "malformed pattern (missing arguments to '%%b')") + } + + if spos >= len(*ms.src) || (*ms.src)[spos] != (*ms.p)[ppos] { + return 0, false + } else { + b := (*ms.p)[ppos] + e := (*ms.p)[ppos+1] + cont := 1 + for { + spos++ + if spos >= len(*ms.src) { + break + } + if (*ms.src)[spos] == e { + cont-- + if cont == 0 { + return spos + 1, true + } + } else if (*ms.src)[spos] == b { + cont++ + } + } + } + + return 0, false +} + func maxExpand(ms *matchState, spos int, ppos int, eppos int) (int, bool) { i := 0 // counts maximum expand for item for { @@ -320,17 +350,43 @@ init: // using goto's to optimize tail recursion } } case lEsc: - pnext := (*ms.p)[ppos+1] + var pnext byte + if ppos+1 < len(*ms.p) { + pnext = (*ms.p)[ppos+1] + } switch { case pnext == 'b': // balanced string? - // TODO + spos, ok = matchbalance(ms, spos, ppos+2) + if ok { + ppos = ppos + 4 + goto init // return match(ms, s, p + 4) + } // else fail case pnext == 'f': // frontier? - // TODO + ppos = ppos + 2 + if ppos >= len(*ms.p) || (*ms.p)[ppos] != '[' { + Errorf(ms.l, "missing '[' after '%%f' in pattern") + } + eppos := classend(ms, ppos) // points to what is next + var previous byte = 0 + if spos != 0 { + previous = (*ms.src)[spos-1] + } + if !matchbracketclass(previous, *ms.p, ppos, eppos-1) { + var sc byte + if spos < len(*ms.src) { + sc = (*ms.src)[spos] + } + if matchbracketclass(sc, *ms.p, ppos, eppos-1) { + ppos = eppos + goto init + } + } + ok = false // match failed case pnext >= '0' && pnext <= '9': /* capture results (%0-%9)? */ spos, ok = matchCapture(ms, spos, int((*ms.p)[ppos+1])) if ok { ppos = ppos + 2 - goto init + goto init // return match(ms, s, p + 2) } default: if defaultCase() { @@ -466,7 +522,7 @@ func gmatchAux(l *State) int { } srcpos, _ := l.ToInteger(UpValueIndex(3)) - for ; srcpos < len(*ms.src); srcpos++ { + for ; srcpos <= len(*ms.src); srcpos++ { ms.level = 0 epos, ok := match(&ms, srcpos, 0) if ok { From 1fd12d2cc55c6c4a27a40239413136eb0b528989 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Thu, 9 Feb 2017 18:19:30 -0500 Subject: [PATCH 11/13] Get rid of default closure in match function The original version of match() used goto labels to avoid recursive function calls and keep control over the stack size. However, Go cannot jump from one block to another, so the original location of the "dflt" label was unusable. Until this commit, I worked around this deficiency with a closure, but this made the code harder to follow and "out-of-order" compared to the original codebase. The closure is gone, and instead we move the default case of the main switch out into the parent block, reintroducing the old "dflt" label. In cases where we do not want to execute the "dflt" label, we skip over it with a new label that goes straight to the "end" of the function. --- pattern.go | 105 +++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/pattern.go b/pattern.go index 8e93649..e5d3949 100644 --- a/pattern.go +++ b/pattern.go @@ -276,6 +276,15 @@ func matchCapture(ms *matchState, spos int, l int) (int, bool) { } } +// This function makes liberal use of goto in order to keep control over the +// stack size, similar to the original C version of the function. However, +// this implementation has an additional goto label that was not in the +// original code. Go cannot jump from one block to another, so the dflt label +// that used to come right after the default case of the main switch could +// not be jumped into. +// +// Instead, we drag the default case outside of the switch, and skip over it +// to the "end" of the function in cases where we shouldn't execute it. func match(ms *matchState, spos int, ppos int) (int, bool) { if ms.matchDepth == 0 { Errorf(ms.l, "pattern too complex") @@ -283,51 +292,6 @@ func match(ms *matchState, spos int, ppos int) (int, bool) { ms.matchDepth-- ok := true - // The default case - return true to goto init - defaultCase := func() bool { - eppos := classend(ms, ppos) // points to optional suffix - // does not match at least once? - if !singlematch(ms, spos, ppos, eppos) { - var ep byte = 0 - if eppos != len(*ms.p) { - ep = (*ms.p)[eppos] - } - if ep == '*' || ep == '?' || ep == '-' { // accept empty? - ppos = eppos + 1 - return true // return match(ms, spos, eppos + 1); - } else { // '+' or no suffix - ok = false // fail - } - } else { // matched once - var ep byte = 0 - if eppos != len(*ms.p) { - ep = (*ms.p)[eppos] - } - switch ep { - case '?': // optional - res, resOk := match(ms, spos+1, eppos+1) - if resOk { - spos = res - } else { - ppos = eppos + 1 - return true - } - case '+': // 1 or more repetitions - spos++ // 1 match already done - fallthrough - case '*': // 0 or more repetitions - spos, ok = maxExpand(ms, spos, ppos, eppos) - case '-': // 0 or more repetitions (minimum) - spos, ok = minExpand(ms, spos, ppos, eppos) - default: // no suffix - spos++ - ppos = eppos - return true - } - } - return false - } - init: // using goto's to optimize tail recursion if ppos != len(*ms.p) { // end of pattern? switch (*ms.p)[ppos] { @@ -341,9 +305,7 @@ init: // using goto's to optimize tail recursion spos, ok = endCapture(ms, spos, ppos+1) case '$': if ppos+1 != len(*ms.p) { // is the `$' the last char in pattern? - if defaultCase() { - goto init - } + goto dflt } else { if spos != len(*ms.src) { spos, ok = 0, false @@ -389,16 +351,55 @@ init: // using goto's to optimize tail recursion goto init // return match(ms, s, p + 2) } default: - if defaultCase() { + goto dflt + } + default: + goto dflt // Old dflt label was here. + } + goto end // We shouldn't execute the default case. + dflt: // pattern class plus optional suffix + eppos := classend(ms, ppos) // points to optional suffix + // does not match at least once? + if !singlematch(ms, spos, ppos, eppos) { + var ep byte = 0 + if eppos != len(*ms.p) { + ep = (*ms.p)[eppos] + } + if ep == '*' || ep == '?' || ep == '-' { // accept empty? + ppos = eppos + 1 + goto init // return match(ms, spos, eppos + 1); + } else { // '+' or no suffix + ok = false // fail + } + } else { // matched once + var ep byte = 0 + if eppos != len(*ms.p) { + ep = (*ms.p)[eppos] + } + switch ep { + case '?': // optional + res, resOk := match(ms, spos+1, eppos+1) + if resOk { + spos = res + } else { + ppos = eppos + 1 goto init } - } - default: // pattern class plus optional suffix - if defaultCase() { + case '+': // 1 or more repetitions + spos++ // 1 match already done + fallthrough + case '*': // 0 or more repetitions + spos, ok = maxExpand(ms, spos, ppos, eppos) + case '-': // 0 or more repetitions (minimum) + spos, ok = minExpand(ms, spos, ppos, eppos) + default: // no suffix + spos++ + ppos = eppos goto init } } } +end: ms.matchDepth++ return spos, ok } From 2fc4bec2b7c29ca76c1891d8368b70200926abce Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Thu, 9 Feb 2017 18:30:34 -0500 Subject: [PATCH 12/13] Roll duplicate cases into one line --- auxiliary.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/auxiliary.go b/auxiliary.go index d62c4d0..f1727c8 100644 --- a/auxiliary.go +++ b/auxiliary.go @@ -508,9 +508,7 @@ func LoadFile(l *State, fileName, mode string) error { _ = f.Close() } switch err { - case nil: // do nothing - case SyntaxError: // do nothing - case MemoryError: // do nothing + case nil, SyntaxError, MemoryError: // do nothing default: l.SetTop(fileNameIndex) return fileError("read") From 82ec1f7c4e2c8e04f5717a48733caca9190507d8 Mon Sep 17 00:00:00 2001 From: Alex Mayfield Date: Thu, 9 Feb 2017 18:44:26 -0500 Subject: [PATCH 13/13] Remove workaround for buggy ToBoolean behavior --- pattern.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pattern.go b/pattern.go index e5d3949..3c19c86 100644 --- a/pattern.go +++ b/pattern.go @@ -456,8 +456,7 @@ func strFindAux(l *State, find bool) int { return 1 } // explicit request or no special characters? - // FIXME: ToBoolean returns true for invalid index - if find && (l.Top() >= 4 && l.ToBoolean(4)) || nospecials(p) { + if find && l.ToBoolean(4) || nospecials(p) { // do a plain search s2 := strings.Index(s[init-1:], p) if s2 != -1 {