diff --git a/README.md b/README.md
index 781b3d0..d8661fa 100644
--- a/README.md
+++ b/README.md
@@ -250,7 +250,7 @@ allows for sorting data such as Go structs and JSON objects.
> Note: angle brackets (`<` and `>`) are not supported by block mode due to
> being used for mathematical expressions in an unbalanced format.
-#### Custom grouping
+#### Prefix grouping
Another way to group lines together is with the `group_prefixes` option. This
takes a comma-separated list of prefixes. Any line beginning with one of those
@@ -291,6 +291,51 @@ and tomatoes
+#### Regex-delimited grouping
+
+Two mutually-exclusive options exist for delimiting groups using regular
+expressions: `group_start_regex` and `group_end_regex`. If _part_ of a line
+matches the specified regular expression, that line will end the previous group
+and start a new group. With `group_start_regex`, the matching line will be at
+the start of a new group (potentially preceded by sticky comments), whereas with
+`group_end_regex`, the line will end the group. Some matching lines may be
+ignored based on the other options that are enabled.
+
+
+
+|
+
+```
+
+// Some comment for foo
+define foo =
+abc + def;
+
+// Some other comment for bar
+define bar =
+ghi + jkl;
+
+```
+
+ |
+
+
+```diff
++// keep-sorted start group_start_regex=["^define\\b"] newline_separated=yes
+ // Some other comment for bar
+ define bar =
+ ghi + jkl;
+
+ // Some comment for foo
+ define foo =
+ abc + def;
++// keep-sorted end
+```
+
+ |
+
+
+
#### Comments
Comments embedded within the sorted block are made to stick with their
diff --git a/goldens/group_regex.in b/goldens/group_regex.in
new file mode 100644
index 0000000..c006547
--- /dev/null
+++ b/goldens/group_regex.in
@@ -0,0 +1,68 @@
+# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes
+# Foo comment
+CREATE PUBLIC FUNCTION Foo(x INT64)
+RETURNS INT64
+AS (
+ x + 1
+);
+
+# Bar comment
+CREATE PUBLIC FUNCTION Bar(x INT64)
+RETURNS INT64
+AS (
+ x + 2
+);
+
+# Baz comment
+CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64)
+RETURNS INT64
+AS (
+ x + LENGTH('CREATE')
+);
+# keep-sorted-test end
+
+# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes by_regex=["\\bFUNCTION (\\w+)\\b"]
+# Foo comment
+CREATE PUBLIC FUNCTION Foo(x INT64)
+RETURNS INT64
+AS (
+ x + 1
+);
+
+# Bar comment
+CREATE PRIVATE FUNCTION Bar(x INT64)
+RETURNS INT64
+AS (
+ x + 2
+);
+
+# Baz comment
+CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64)
+RETURNS INT64
+AS (
+ SUM(x) + LENGTH('CREATE FUNCTION Aaa')
+);
+# keep-sorted-test end
+
+# keep-sorted-test start group_end_regex=;$ newline_separated=yes
+# Foo comment
+CREATE PUBLIC FUNCTION Foo(x INT64)
+RETURNS INT64
+AS (
+ x + 1
+);
+
+# Bar comment
+CREATE PUBLIC FUNCTION Bar(x INT64)
+RETURNS INT64
+AS (
+ x + 2
+);
+
+# Baz comment
+CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64)
+RETURNS INT64
+AS (
+ x + LENGTH('CREATE')
+);
+# keep-sorted-test end
diff --git a/goldens/group_regex.out b/goldens/group_regex.out
new file mode 100644
index 0000000..ce99020
--- /dev/null
+++ b/goldens/group_regex.out
@@ -0,0 +1,68 @@
+# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes
+# Baz comment
+CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64)
+RETURNS INT64
+AS (
+ x + LENGTH('CREATE')
+);
+
+# Bar comment
+CREATE PUBLIC FUNCTION Bar(x INT64)
+RETURNS INT64
+AS (
+ x + 2
+);
+
+# Foo comment
+CREATE PUBLIC FUNCTION Foo(x INT64)
+RETURNS INT64
+AS (
+ x + 1
+);
+# keep-sorted-test end
+
+# keep-sorted-test start group_start_regex=^CREATE newline_separated=yes by_regex=["\\bFUNCTION (\\w+)\\b"]
+# Bar comment
+CREATE PRIVATE FUNCTION Bar(x INT64)
+RETURNS INT64
+AS (
+ x + 2
+);
+
+# Baz comment
+CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64)
+RETURNS INT64
+AS (
+ SUM(x) + LENGTH('CREATE FUNCTION Aaa')
+);
+
+# Foo comment
+CREATE PUBLIC FUNCTION Foo(x INT64)
+RETURNS INT64
+AS (
+ x + 1
+);
+# keep-sorted-test end
+
+# keep-sorted-test start group_end_regex=;$ newline_separated=yes
+# Baz comment
+CREATE PUBLIC AGGREGATE FUNCTION Baz(x INT64)
+RETURNS INT64
+AS (
+ x + LENGTH('CREATE')
+);
+
+# Bar comment
+CREATE PUBLIC FUNCTION Bar(x INT64)
+RETURNS INT64
+AS (
+ x + 2
+);
+
+# Foo comment
+CREATE PUBLIC FUNCTION Foo(x INT64)
+RETURNS INT64
+AS (
+ x + 1
+);
+# keep-sorted-test end
diff --git a/keepsorted/line_group.go b/keepsorted/line_group.go
index 01c61e5..a637de8 100644
--- a/keepsorted/line_group.go
+++ b/keepsorted/line_group.go
@@ -61,6 +61,16 @@ type accessRecorder struct {
joinedComment bool
}
+// matchesAnyRegex returns true if s matches one of the regexes.
+func matchesAnyRegex(s string, regexes []*regexp.Regexp) bool {
+ for _, regex := range regexes {
+ if regex.FindStringSubmatch(s) != nil {
+ return true
+ }
+ }
+ return false
+}
+
// groupLines splits lines into one or more lineGroups based on the provided options.
func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
var groups []*lineGroup
@@ -104,6 +114,23 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
increasedIndent := !lineRange.empty() && initialIndent != nil && indents[i] > *initialIndent
return increasedIndent || numUnmatchedStartDirectives > 0 || metadata.opts.hasGroupPrefix(l)
}
+ // Determines whether the current line should be part of a regex-delimited
+ // group including any prior lines already visited.
+ // Returns another boolean indicating whether the group should be ending
+ // after that line if so.
+ shouldAddToRegexDelimitedGroup := func(l string) (addToGroup bool, finishGroupAfter bool) {
+ if metadata.opts.GroupStartRegex != nil {
+ // For GroupStartRegex, all non-regex-matching lines should be
+ // part of the group including prior lines.
+ return !matchesAnyRegex(l, metadata.opts.GroupStartRegex), false
+ }
+ if metadata.opts.GroupEndRegex != nil {
+ // For GroupEndRegex, the line should always be included in the
+ // group including prior lines, but possibly terminate it.
+ return true, matchesAnyRegex(l, metadata.opts.GroupEndRegex)
+ }
+ return false, false
+ }
countStartDirectives := func(l string) {
if strings.Contains(l, metadata.startDirective) {
numUnmatchedStartDirectives++
@@ -128,6 +155,13 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
}
// finish an outstanding lineGroup and reset our state to prepare for a new lineGroup.
finishGroup := func() {
+ // If the current lineRange ends with an extra empty line, remove it and place it in a separate group.
+ // This is notably needed to support group_start_regex or group_end_regex being set at the same time as newline_separated.
+ endingEmptyLines := 0
+ for lineRange.size() > 1 && lines[lineRange.end-1] == "" {
+ endingEmptyLines++
+ lineRange.end--
+ }
groups = append(groups, &lineGroup{
opts: metadata.opts,
prefixOrder: prefixOrder,
@@ -136,6 +170,13 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
commentRange = indexRange{}
lineRange = indexRange{}
block = codeBlock{}
+ for ; endingEmptyLines > 0; endingEmptyLines-- {
+ groups = append(groups, &lineGroup{
+ opts: metadata.opts,
+ prefixOrder: prefixOrder,
+ lineGroupContent: lineGroupContent{lines: []string{""}},
+ })
+ }
}
for i, l := range lines {
if shouldAddToBlock() || shouldAddToGroup(i, l) {
@@ -154,6 +195,11 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
// count end directives via its appendLine call.
countStartDirectives(l)
}
+ } else if addToGroup, finishGroupAfter := shouldAddToRegexDelimitedGroup(l); addToGroup {
+ appendLine(i, l)
+ if finishGroupAfter {
+ finishGroup()
+ }
} else {
// Begin a new block or group.
if !lineRange.empty() {
@@ -225,6 +271,13 @@ func (r *indexRange) empty() bool {
return !r.init || r.start == r.end
}
+func (r *indexRange) size() int {
+ if !r.init {
+ return 0
+ }
+ return r.end - r.start
+}
+
func (r *indexRange) append(i int) {
if !r.init {
r.start = i
diff --git a/keepsorted/options.go b/keepsorted/options.go
index 0b510c1..e36d20d 100644
--- a/keepsorted/options.go
+++ b/keepsorted/options.go
@@ -44,7 +44,7 @@ type ByRegexOption struct {
type SortOrder string
const (
- OrderAsc SortOrder = "asc"
+ OrderAsc SortOrder = "asc"
OrderDesc SortOrder = "desc"
)
@@ -97,6 +97,19 @@ type blockOptions struct {
// StickyPrefixes tells us about other types of lines that should behave as sticky comments.
StickyPrefixes map[string]bool `key:"sticky_prefixes"`
+ // Regex-based group options:
+ // Conceptually, GroupStartRegex lines go to the *next* group while GroupEndRegex lines go to the *current* group.
+
+ // GroupStartRegex is a list of regexes that match the start of a group of lines (does not need to match the whole line).
+ // If none of the listed regexes match a given line, the line is considered to be part of the same
+ // group as the previous line.
+ GroupStartRegex []*regexp.Regexp `key:"group_start_regex"`
+ // GroupEndRegex is a list of regexes that match the end of a group of lines (does not need to match the whole line).
+ // If any of the listed regexes match a given line, the line will end the current group,
+ // provided that it does not get ignored by other options (indented/prefixed group, block, sticky comment).
+ // Non-comment lines no longer end groups when GroupEndRegex is used.
+ GroupEndRegex []*regexp.Regexp `key:"group_end_regex"`
+
///////////////////////
// Sorting options //
///////////////////////
@@ -240,6 +253,13 @@ func formatValue(val reflect.Value) (string, error) {
return fmt.Sprintf("[%s]", strings.Join(vals, ", ")), nil
}
return formatList(vals)
+ case reflect.TypeFor[[]*regexp.Regexp]():
+ regexps := val.Interface().([]*regexp.Regexp)
+ vals := make([]string, len(regexps))
+ for i, regex := range regexps {
+ vals[i] = regex.String()
+ }
+ return formatList(vals)
}
panic(fmt.Errorf("unsupported blockOptions type: %v", val.Type()))
@@ -321,6 +341,11 @@ func validate(opts *blockOptions) (warnings []error) {
opts.IgnorePrefixes = nil
}
+ if opts.GroupStartRegex != nil && opts.GroupEndRegex != nil {
+ warns = append(warns, fmt.Errorf("group_start_regex should not be used together with group_end_regex; ignoring group_end_regex"))
+ opts.GroupEndRegex = nil
+ }
+
return warns
}
diff --git a/keepsorted/options_parser.go b/keepsorted/options_parser.go
index ddd8823..4943842 100644
--- a/keepsorted/options_parser.go
+++ b/keepsorted/options_parser.go
@@ -69,7 +69,13 @@ func (p *parser) popValue(typ reflect.Type) (reflect.Value, error) {
val, err := p.popSet()
return reflect.ValueOf(val), err
case reflect.TypeFor[[]ByRegexOption]():
- val, err := p.popListRegexOption()
+ val, err := p.popByRegexOption()
+ if err != nil {
+ return reflect.Zero(typ), err
+ }
+ return reflect.ValueOf(val), nil
+ case reflect.TypeFor[[]*regexp.Regexp]():
+ val, err := p.popRegexListOption()
if err != nil {
return reflect.Zero(typ), err
}
@@ -183,13 +189,17 @@ func (p *parser) popList() ([]string, error) {
return popListValue(p, func(s string) (string, error) { return s, nil })
}
-func (p *parser) popListRegexOption() ([]ByRegexOption, error) {
+func (p *parser) popByRegexOption() ([]ByRegexOption, error) {
return popListValue(p, func(s string) (ByRegexOption, error) {
pat, err := regexp.Compile(s)
return ByRegexOption{Pattern: pat}, err
})
}
+func (p *parser) popRegexListOption() ([]*regexp.Regexp, error) {
+ return popListValue(p, regexp.Compile)
+}
+
func (p *parser) popSortOrder() (SortOrder, error) {
val, rest, _ := strings.Cut(p.line, " ")
p.line = rest
diff --git a/keepsorted/options_test.go b/keepsorted/options_test.go
index 8967cb7..dccfd75 100644
--- a/keepsorted/options_test.go
+++ b/keepsorted/options_test.go
@@ -229,6 +229,24 @@ func TestBlockOptions(t *testing.T) {
want: blockOptions{Order: OrderAsc},
wantErr: `while parsing option "order": unrecognized order value "foo", expected 'asc' or 'desc'`,
},
+ {
+ name: "GroupStartRegex",
+ in: "group_start_regex=^CREATE",
+
+ want: blockOptions{
+ GroupStartRegex: []*regexp.Regexp{regexp.MustCompile("^CREATE")},
+ },
+ },
+ {
+ name: "GroupStartRegex_YAML",
+ in: "group_start_regex=['^CREATE', 'b']",
+ defaultOptions: blockOptions{AllowYAMLLists: true},
+
+ want: blockOptions{
+ AllowYAMLLists: true,
+ GroupStartRegex: []*regexp.Regexp{regexp.MustCompile("^CREATE"), regexp.MustCompile("b")},
+ },
+ },
} {
t.Run(tc.name, func(t *testing.T) {
initZerolog(t)