From c4e3f803bca4b527a567bb2a43ee484abc20786c Mon Sep 17 00:00:00 2001 From: Daniel Rubery Date: Sun, 26 Oct 2025 16:47:39 -0700 Subject: [PATCH] Adding user-defined group delimiters This addresses https://github.com/google/keep-sorted/issues/97. The discussion there suggested trying to override line breaks, but this turns out to be difficult. The options are not specified until after we've split into lines, and lots of places use line numbers for fixes. Instead, we allow customization of how lines are associated into groups. In order to support the TOML's use case (end groups on blank line) and SQL's use case (end groups if the line ends in a semicolon), we allow a new list of regexes, group_delimiter_regexes that determine if the line should end a group. There's a subtlety to precedence with sticky_prefixes. Since lines starting with sticky_prefixes should associate to a group below them, the first line with a sticky prefix can create a new group, despite what the We allow sticky_prefixes to take precedence. --- goldens/group_delimiter_regexes.in | 20 ++++++++++++ goldens/group_delimiter_regexes.out | 20 ++++++++++++ keepsorted/keep_sorted_test.go | 47 +++++++++++++++++++++++++++++ keepsorted/line_group.go | 13 +++++++- keepsorted/options.go | 23 ++++++++------ keepsorted/options_parser.go | 10 +++--- keepsorted/options_parser_test.go | 6 ++-- keepsorted/options_test.go | 21 ++++++++++--- 8 files changed, 137 insertions(+), 23 deletions(-) create mode 100644 goldens/group_delimiter_regexes.in create mode 100644 goldens/group_delimiter_regexes.out diff --git a/goldens/group_delimiter_regexes.in b/goldens/group_delimiter_regexes.in new file mode 100644 index 0000000..59240db --- /dev/null +++ b/goldens/group_delimiter_regexes.in @@ -0,0 +1,20 @@ +Sort by consecutive newlines: + // keep-sorted-test start group_delimiter_regexes=['^$'] + b-block + attached line 1 + attached line 2 + + a-block + attached line 3 + attached line 4 + // keep-sorted-test end + +Sort by semicolon: + // keep-sorted-test start group_delimiter_regexes=[';$'] + XXX a semicolon ; in the middle is ignored + But at the end it terminates the group ; + + ZZZ This group will come after YYY; + + YYY will be sorted up; + // keep-sorted-test end diff --git a/goldens/group_delimiter_regexes.out b/goldens/group_delimiter_regexes.out new file mode 100644 index 0000000..04638dd --- /dev/null +++ b/goldens/group_delimiter_regexes.out @@ -0,0 +1,20 @@ +Sort by consecutive newlines: + // keep-sorted-test start group_delimiter_regexes=['^$'] + a-block + attached line 3 + attached line 4 + b-block + attached line 1 + attached line 2 + + // keep-sorted-test end + +Sort by semicolon: + // keep-sorted-test start group_delimiter_regexes=[';$'] + XXX a semicolon ; in the middle is ignored + But at the end it terminates the group ; + + YYY will be sorted up; + + ZZZ This group will come after YYY; + // keep-sorted-test end diff --git a/keepsorted/keep_sorted_test.go b/keepsorted/keep_sorted_test.go index c82265c..5826292 100644 --- a/keepsorted/keep_sorted_test.go +++ b/keepsorted/keep_sorted_test.go @@ -15,6 +15,7 @@ package keepsorted import ( + "regexp" "strings" "testing" @@ -1632,6 +1633,52 @@ func TestLineGrouping(t *testing.T) { `"""`}}, }, }, + { + name: "GroupDelimiter_BlankLine", + opts: blockOptions{ + GroupDelimiterRegexes: []RegexOption{ + {Pattern: regexp.MustCompile(`^$`)}, + }, + }, + + want: []lineGroupContent{ + {lines: []string{ + "[toml]", + "key=value", + "", + }}, + {lines: []string{ + "[block_two]", + "key=value", + "", + }}, + {lines: []string{ + "[block_three]", + "final_key=value", + }}, + }, + }, + { + name: "GroupDelimiter_Semicolon", + opts: blockOptions{ + GroupDelimiterRegexes: []RegexOption{ + {Pattern: regexp.MustCompile(`;$`)}, + }, + }, + + want: []lineGroupContent{ + {lines: []string{ + "statement ; in middle", + "semicolon at end;", + }}, + {lines: []string{ + "Next paragraph;", + }}, + {lines: []string{ + "And the final one", + }}, + }, + }, } { t.Run(tc.name, func(t *testing.T) { initZerolog(t) diff --git a/keepsorted/line_group.go b/keepsorted/line_group.go index b6b33ac..49f15ac 100644 --- a/keepsorted/line_group.go +++ b/keepsorted/line_group.go @@ -149,6 +149,17 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup { } else { commentRange.append(i) } + } else if len(metadata.opts.GroupDelimiterRegexes) != 0 { + appendLine(i, l) + for _, match := range metadata.opts.matchRegexes(l, metadata.opts.GroupDelimiterRegexes) { + if match == nil { + continue + } + if !lineRange.empty() { + finishGroup() + } + break + } } else { if !lineRange.empty() { finishGroup() @@ -361,7 +372,7 @@ func (lg *lineGroup) commentOnly() bool { func (lg *lineGroup) regexTokens() []regexToken { // TODO: jfaer - Should we match regexes on the original content? - regexMatches := lg.opts.matchRegexes(lg.internalJoinedLines()) + regexMatches := lg.opts.matchRegexes(lg.internalJoinedLines(), lg.opts.ByRegex) ret := make([]regexToken, len(regexMatches)) if lg.access.regexTokens == nil { lg.access.regexTokens = make([]regexTokenAccessRecorder, len(regexMatches)) diff --git a/keepsorted/options.go b/keepsorted/options.go index ed10687..20c31bb 100644 --- a/keepsorted/options.go +++ b/keepsorted/options.go @@ -35,7 +35,7 @@ import ( // true is unmarshaled as 1, false as 0. type IntOrBool int -type ByRegexOption struct { +type RegexOption struct { Pattern *regexp.Regexp Template *string } @@ -67,7 +67,7 @@ func (opts BlockOptions) String() string { // - []string: key=a,b,c,d // - map[string]bool: key=a,b,c,d // - int: key=123 -// - ByRegexOptions key=a,b,c,d, key=[yaml_list] +// - []RegexOptions: key=a,b,c,d, key=[yaml_list] type blockOptions struct { // AllowYAMLLists determines whether list.set valued options are allowed to be specified by YAML. AllowYAMLLists bool `key:"allow_yaml_lists"` @@ -88,6 +88,8 @@ type blockOptions struct { StickyComments bool `key:"sticky_comments"` // StickyPrefixes tells us about other types of lines that should behave as sticky comments. StickyPrefixes map[string]bool `key:"sticky_prefixes"` + // GroupDelimiterRegexes tells us if a line is allowed to end a group. + GroupDelimiterRegexes []RegexOption `key:"group_delimiter_regexes"` /////////////////////// // Sorting options // @@ -102,7 +104,7 @@ type blockOptions struct { // IgnorePrefixes is a slice of prefixes that we do not consider when sorting lines. IgnorePrefixes []string `key:"ignore_prefixes"` // ByRegex is a slice of regexes that are used to extract the pieces of the line group that keep-sorted should sort by. - ByRegex []ByRegexOption `key:"by_regex"` + ByRegex []RegexOption `key:"by_regex"` //////////////////////////// // Post-sorting options // @@ -210,8 +212,8 @@ func formatValue(val reflect.Value) (string, error) { return strconv.Itoa(int(val.Int())), nil case reflect.TypeFor[int](): return strconv.Itoa(int(val.Int())), nil - case reflect.TypeFor[[]ByRegexOption](): - opts := val.Interface().([]ByRegexOption) + case reflect.TypeFor[[]RegexOption](): + opts := val.Interface().([]RegexOption) vals := make([]string, 0, len(opts)) seenTemplate := false for _, opt := range opts { @@ -390,20 +392,20 @@ func (opts blockOptions) trimIgnorePrefix(s string) string { return s } -// matchRegexes applies ByRegex to s. -// If ByRegex is empty, returns a slice that contains just s. +// matchRegexes applies regexes to s. +// If regexes is empty, returns a slice that contains just s. // Otherwise, applies each regex to s in sequence: // If a regex has capturing groups, the capturing groups will be added to the // resulting slice. // If a regex does not have capturing groups, all matched text will be added to // the resulting slice. -func (opts blockOptions) matchRegexes(s string) []regexMatch { - if len(opts.ByRegex) == 0 { +func (opts blockOptions) matchRegexes(s string, regexes []RegexOption) []regexMatch { + if len(regexes) == 0 { return []regexMatch{{s}} } var ret []regexMatch - for _, p := range opts.ByRegex { + for _, p := range regexes { regex := p.Pattern if p.Template != nil { @@ -421,6 +423,7 @@ func (opts blockOptions) matchRegexes(s string) []regexMatch { } m := regex.FindStringSubmatch(s) + if m == nil { ret = append(ret, regexDidNotMatch) continue diff --git a/keepsorted/options_parser.go b/keepsorted/options_parser.go index 0d264a0..aa249d4 100644 --- a/keepsorted/options_parser.go +++ b/keepsorted/options_parser.go @@ -65,7 +65,7 @@ func (p *parser) popValue(typ reflect.Type) (reflect.Value, error) { case reflect.TypeFor[map[string]bool](): val, err := p.popSet() return reflect.ValueOf(val), err - case reflect.TypeFor[[]ByRegexOption](): + case reflect.TypeFor[[]RegexOption](): val, err := p.popListRegexOption() if err != nil { return reflect.Zero(typ), err @@ -113,7 +113,7 @@ func (p *parser) popIntOrBool() (IntOrBool, error) { return IntOrBool(i), nil } -func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error { +func (ar *RegexOption) UnmarshalYAML(node *yaml.Node) error { switch node.Tag { case "!!str": pat, err := regexp.Compile(node.Value) @@ -180,10 +180,10 @@ func (p *parser) popList() ([]string, error) { return popListValue(p, func(s string) (string, error) { return s, nil }) } -func (p *parser) popListRegexOption() ([]ByRegexOption, error) { - return popListValue(p, func(s string) (ByRegexOption, error) { +func (p *parser) popListRegexOption() ([]RegexOption, error) { + return popListValue(p, func(s string) (RegexOption, error) { pat, err := regexp.Compile(s) - return ByRegexOption{Pattern: pat}, err + return RegexOption{Pattern: pat}, err }) } diff --git a/keepsorted/options_parser_test.go b/keepsorted/options_parser_test.go index 284cab5..309fc7b 100644 --- a/keepsorted/options_parser_test.go +++ b/keepsorted/options_parser_test.go @@ -218,14 +218,14 @@ func TestPopValue(t *testing.T) { name: "Regex", input: ".*", - want: []ByRegexOption{{regexp.MustCompile(".*"), nil}}, + want: []RegexOption{{regexp.MustCompile(".*"), nil}}, }, { name: "MultipleRegex", input: `[.*, abcd, '(?:efgh)ijkl']`, allowYAMLList: true, - want: []ByRegexOption{ + want: []RegexOption{ {regexp.MustCompile(".*"), nil}, {regexp.MustCompile("abcd"), nil}, {regexp.MustCompile("(?:efgh)ijkl"), nil}, @@ -236,7 +236,7 @@ func TestPopValue(t *testing.T) { input: `[.*, Mon: 0, '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}', "0: 1": 2]`, allowYAMLList: true, - want: []ByRegexOption{ + want: []RegexOption{ {regexp.MustCompile(".*"), nil}, {regexp.MustCompile("Mon"), &([]string{"0"})[0]}, {regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]}, diff --git a/keepsorted/options_test.go b/keepsorted/options_test.go index 13f4d1c..fe0ad94 100644 --- a/keepsorted/options_test.go +++ b/keepsorted/options_test.go @@ -193,7 +193,7 @@ func TestBlockOptions(t *testing.T) { want: blockOptions{ AllowYAMLLists: true, - ByRegex: []ByRegexOption{ + ByRegex: []RegexOption{ {regexp.MustCompile("(?:abcd)"), nil}, {regexp.MustCompile("efg.*"), nil}, }, }, @@ -205,13 +205,26 @@ func TestBlockOptions(t *testing.T) { want: blockOptions{ AllowYAMLLists: true, - ByRegex: []ByRegexOption{ + ByRegex: []RegexOption{ {Pattern: regexp.MustCompile(`.*`)}, {Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), Template: &[]string{"${3}-${1}-${2}"}[0]}, }, }, }, + { + name: "GroupDelimiterRegexes", + in: `group_delimiter_regexes=['^$', ';$']`, + defaultOptions: blockOptions{AllowYAMLLists: true}, + + want: blockOptions{ + AllowYAMLLists: true, + GroupDelimiterRegexes: []RegexOption{ + {Pattern: regexp.MustCompile(`^$`)}, + {Pattern: regexp.MustCompile(`;$`)}, + }, + }, + }, } { t.Run(tc.name, func(t *testing.T) { initZerolog(t) @@ -325,10 +338,10 @@ func TestBlockOptions_regexTransform(t *testing.T) { t.Run(tc.name, func(t *testing.T) { var opts blockOptions for _, regex := range tc.regexes { - opts.ByRegex = append(opts.ByRegex, ByRegexOption{regexp.MustCompile(regex), nil}) + opts.ByRegex = append(opts.ByRegex, RegexOption{regexp.MustCompile(regex), nil}) } - gotTokens := opts.matchRegexes(tc.in) + gotTokens := opts.matchRegexes(tc.in, opts.ByRegex) got := make([][]string, len(gotTokens)) for i, t := range gotTokens { got[i] = []string(t)