diff --git a/README.md b/README.md index 781b3d0..d8661fa 100644 --- a/README.md +++ b/README.md @@ -250,7 +250,7 @@ allows for sorting data such as Go structs and JSON objects. > Note: angle brackets (`<` and `>`) are not supported by block mode due to > being used for mathematical expressions in an unbalanced format. -#### Custom grouping +#### Prefix grouping Another way to group lines together is with the `group_prefixes` option. This takes a comma-separated list of prefixes. Any line beginning with one of those @@ -291,6 +291,51 @@ and tomatoes +#### Regex-delimited grouping + +Two mutually-exclusive options exist for delimiting groups using regular +expressions: `group_start_regex` and `group_end_regex`. If _part_ of a line +matches the specified regular expression, that line will end the previous group +and start a new group. With `group_start_regex`, the matching line will be at +the start of a new group (potentially preceded by sticky comments), whereas with +`group_end_regex`, the line will end the group. Some matching lines may be +ignored based on the other options that are enabled. + + + + + + +
+ +``` + +// Some comment for foo +define foo = +abc + def; + +// Some other comment for bar +define bar = +ghi + jkl; + +``` + + + +```diff ++// keep-sorted start group_start_regex=["^define\\b"] newline_separated=yes + // Some other comment for bar + define bar = + ghi + jkl; + + // Some comment for foo + define foo = + abc + def; ++// keep-sorted end +``` + +
+ #### Comments Comments embedded within the sorted block are made to stick with their diff --git a/goldens/group_regex.in b/goldens/group_regex.in new file mode 100644 index 0000000..c006547 --- /dev/null +++ b/goldens/group_regex.in @@ -0,0 +1,68 @@ +# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes +# Foo comment +CREATE PUBLIC FUNCTION Foo(x INT64) +RETURNS INT64 +AS ( + x + 1 +); + +# Bar comment +CREATE PUBLIC FUNCTION Bar(x INT64) +RETURNS INT64 +AS ( + x + 2 +); + +# Baz comment +CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64) +RETURNS INT64 +AS ( + x + LENGTH('CREATE') +); +# keep-sorted-test end + +# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes by_regex=["\\bFUNCTION (\\w+)\\b"] +# Foo comment +CREATE PUBLIC FUNCTION Foo(x INT64) +RETURNS INT64 +AS ( + x + 1 +); + +# Bar comment +CREATE PRIVATE FUNCTION Bar(x INT64) +RETURNS INT64 +AS ( + x + 2 +); + +# Baz comment +CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64) +RETURNS INT64 +AS ( + SUM(x) + LENGTH('CREATE FUNCTION Aaa') +); +# keep-sorted-test end + +# keep-sorted-test start group_end_regex=;$ newline_separated=yes +# Foo comment +CREATE PUBLIC FUNCTION Foo(x INT64) +RETURNS INT64 +AS ( + x + 1 +); + +# Bar comment +CREATE PUBLIC FUNCTION Bar(x INT64) +RETURNS INT64 +AS ( + x + 2 +); + +# Baz comment +CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64) +RETURNS INT64 +AS ( + x + LENGTH('CREATE') +); +# keep-sorted-test end diff --git a/goldens/group_regex.out b/goldens/group_regex.out new file mode 100644 index 0000000..ce99020 --- /dev/null +++ b/goldens/group_regex.out @@ -0,0 +1,68 @@ +# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes +# Baz comment +CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64) +RETURNS INT64 +AS ( + x + LENGTH('CREATE') +); + +# Bar comment +CREATE PUBLIC FUNCTION Bar(x INT64) +RETURNS INT64 +AS ( + x + 2 +); + +# Foo comment +CREATE PUBLIC FUNCTION Foo(x INT64) +RETURNS INT64 +AS ( + x + 1 +); +# keep-sorted-test end + +# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes by_regex=["\\bFUNCTION (\\w+)\\b"] +# Bar comment +CREATE PRIVATE FUNCTION Bar(x INT64) +RETURNS INT64 +AS ( + x + 2 +); + +# Baz comment +CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64) +RETURNS INT64 +AS ( + SUM(x) + LENGTH('CREATE FUNCTION Aaa') +); + +# Foo comment +CREATE PUBLIC FUNCTION Foo(x INT64) +RETURNS INT64 +AS ( + x + 1 +); +# keep-sorted-test end + +# keep-sorted-test start group_end_regex=;$ newline_separated=yes +# Baz comment +CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64) +RETURNS INT64 +AS ( + x + LENGTH('CREATE') +); + +# Bar comment +CREATE PUBLIC FUNCTION Bar(x INT64) +RETURNS INT64 +AS ( + x + 2 +); + +# Foo comment +CREATE PUBLIC FUNCTION Foo(x INT64) +RETURNS INT64 +AS ( + x + 1 +); +# keep-sorted-test end diff --git a/keepsorted/line_group.go b/keepsorted/line_group.go index 01c61e5..a637de8 100644 --- a/keepsorted/line_group.go +++ b/keepsorted/line_group.go @@ -61,6 +61,16 @@ type accessRecorder struct { joinedComment bool } +// matchesAnyRegex returns true if s matches one of the regexes. +func matchesAnyRegex(s string, regexes []*regexp.Regexp) bool { + for _, regex := range regexes { + if regex.FindStringSubmatch(s) != nil { + return true + } + } + return false +} + // groupLines splits lines into one or more lineGroups based on the provided options. func groupLines(lines []string, metadata blockMetadata) []*lineGroup { var groups []*lineGroup @@ -104,6 +114,23 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup { increasedIndent := !lineRange.empty() && initialIndent != nil && indents[i] > *initialIndent return increasedIndent || numUnmatchedStartDirectives > 0 || metadata.opts.hasGroupPrefix(l) } + // Determines whether the current line should be part of a regex-delimited + // group including any prior lines already visited. + // Returns another boolean indicating whether the group should be ending + // after that line if so. + shouldAddToRegexDelimitedGroup := func(l string) (addToGroup bool, finishGroupAfter bool) { + if metadata.opts.GroupStartRegex != nil { + // For GroupStartRegex, all non-regex-matching lines should be + // part of the group including prior lines. + return !matchesAnyRegex(l, metadata.opts.GroupStartRegex), false + } + if metadata.opts.GroupEndRegex != nil { + // For GroupEndRegex, the line should always be included in the + // group including prior lines, but possibly terminate it. + return true, matchesAnyRegex(l, metadata.opts.GroupEndRegex) + } + return false, false + } countStartDirectives := func(l string) { if strings.Contains(l, metadata.startDirective) { numUnmatchedStartDirectives++ @@ -128,6 +155,13 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup { } // finish an outstanding lineGroup and reset our state to prepare for a new lineGroup. finishGroup := func() { + // If the current lineRange ends with an extra empty line, remove it and place it in a separate group. + // This is notably needed to support group_start_regex or group_end_regex being set at the same time as newline_separated. + endingEmptyLines := 0 + for lineRange.size() > 1 && lines[lineRange.end-1] == "" { + endingEmptyLines++ + lineRange.end-- + } groups = append(groups, &lineGroup{ opts: metadata.opts, prefixOrder: prefixOrder, @@ -136,6 +170,13 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup { commentRange = indexRange{} lineRange = indexRange{} block = codeBlock{} + for ; endingEmptyLines > 0; endingEmptyLines-- { + groups = append(groups, &lineGroup{ + opts: metadata.opts, + prefixOrder: prefixOrder, + lineGroupContent: lineGroupContent{lines: []string{""}}, + }) + } } for i, l := range lines { if shouldAddToBlock() || shouldAddToGroup(i, l) { @@ -154,6 +195,11 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup { // count end directives via its appendLine call. countStartDirectives(l) } + } else if addToGroup, finishGroupAfter := shouldAddToRegexDelimitedGroup(l); addToGroup { + appendLine(i, l) + if finishGroupAfter { + finishGroup() + } } else { // Begin a new block or group. if !lineRange.empty() { @@ -225,6 +271,13 @@ func (r *indexRange) empty() bool { return !r.init || r.start == r.end } +func (r *indexRange) size() int { + if !r.init { + return 0 + } + return r.end - r.start +} + func (r *indexRange) append(i int) { if !r.init { r.start = i diff --git a/keepsorted/options.go b/keepsorted/options.go index 0b510c1..e36d20d 100644 --- a/keepsorted/options.go +++ b/keepsorted/options.go @@ -44,7 +44,7 @@ type ByRegexOption struct { type SortOrder string const ( - OrderAsc SortOrder = "asc" + OrderAsc SortOrder = "asc" OrderDesc SortOrder = "desc" ) @@ -97,6 +97,19 @@ type blockOptions struct { // StickyPrefixes tells us about other types of lines that should behave as sticky comments. StickyPrefixes map[string]bool `key:"sticky_prefixes"` + // Regex-based group options: + // Conceptually, GroupStartRegex lines go to the *next* group while GroupEndRegex lines go to the *current* group. + + // GroupStartRegex is a list of regexes that match the start of a group of lines (does not need to match the whole line). + // If none of the listed regexes match a given line, the line is considered to be part of the same + // group as the previous line. + GroupStartRegex []*regexp.Regexp `key:"group_start_regex"` + // GroupEndRegex is a list of regexes that match the end of a group of lines (does not need to match the whole line). + // If any of the listed regexes match a given line, the line will end the current group, + // provided that it does not get ignored by other options (indented/prefixed group, block, sticky comment). + // Non-comment lines no longer end groups when GroupEndRegex is used. + GroupEndRegex []*regexp.Regexp `key:"group_end_regex"` + /////////////////////// // Sorting options // /////////////////////// @@ -240,6 +253,13 @@ func formatValue(val reflect.Value) (string, error) { return fmt.Sprintf("[%s]", strings.Join(vals, ", ")), nil } return formatList(vals) + case reflect.TypeFor[[]*regexp.Regexp](): + regexps := val.Interface().([]*regexp.Regexp) + vals := make([]string, len(regexps)) + for i, regex := range regexps { + vals[i] = regex.String() + } + return formatList(vals) } panic(fmt.Errorf("unsupported blockOptions type: %v", val.Type())) @@ -321,6 +341,11 @@ func validate(opts *blockOptions) (warnings []error) { opts.IgnorePrefixes = nil } + if opts.GroupStartRegex != nil && opts.GroupEndRegex != nil { + warns = append(warns, fmt.Errorf("group_start_regex should not be used together with group_end_regex; ignoring group_end_regex")) + opts.GroupEndRegex = nil + } + return warns } diff --git a/keepsorted/options_parser.go b/keepsorted/options_parser.go index ddd8823..4943842 100644 --- a/keepsorted/options_parser.go +++ b/keepsorted/options_parser.go @@ -69,7 +69,13 @@ func (p *parser) popValue(typ reflect.Type) (reflect.Value, error) { val, err := p.popSet() return reflect.ValueOf(val), err case reflect.TypeFor[[]ByRegexOption](): - val, err := p.popListRegexOption() + val, err := p.popByRegexOption() + if err != nil { + return reflect.Zero(typ), err + } + return reflect.ValueOf(val), nil + case reflect.TypeFor[[]*regexp.Regexp](): + val, err := p.popRegexListOption() if err != nil { return reflect.Zero(typ), err } @@ -183,13 +189,17 @@ func (p *parser) popList() ([]string, error) { return popListValue(p, func(s string) (string, error) { return s, nil }) } -func (p *parser) popListRegexOption() ([]ByRegexOption, error) { +func (p *parser) popByRegexOption() ([]ByRegexOption, error) { return popListValue(p, func(s string) (ByRegexOption, error) { pat, err := regexp.Compile(s) return ByRegexOption{Pattern: pat}, err }) } +func (p *parser) popRegexListOption() ([]*regexp.Regexp, error) { + return popListValue(p, regexp.Compile) +} + func (p *parser) popSortOrder() (SortOrder, error) { val, rest, _ := strings.Cut(p.line, " ") p.line = rest diff --git a/keepsorted/options_test.go b/keepsorted/options_test.go index 8967cb7..dccfd75 100644 --- a/keepsorted/options_test.go +++ b/keepsorted/options_test.go @@ -229,6 +229,24 @@ func TestBlockOptions(t *testing.T) { want: blockOptions{Order: OrderAsc}, wantErr: `while parsing option "order": unrecognized order value "foo", expected 'asc' or 'desc'`, }, + { + name: "GroupStartRegex", + in: "group_start_regex=^CREATE", + + want: blockOptions{ + GroupStartRegex: []*regexp.Regexp{regexp.MustCompile("^CREATE")}, + }, + }, + { + name: "GroupStartRegex_YAML", + in: "group_start_regex=['^CREATE', 'b']", + defaultOptions: blockOptions{AllowYAMLLists: true}, + + want: blockOptions{ + AllowYAMLLists: true, + GroupStartRegex: []*regexp.Regexp{regexp.MustCompile("^CREATE"), regexp.MustCompile("b")}, + }, + }, } { t.Run(tc.name, func(t *testing.T) { initZerolog(t)