From c9c37e5d315c1f66e64b6c0771ac9a92debf0d23 Mon Sep 17 00:00:00 2001 From: baiqing Date: Thu, 23 Apr 2026 11:06:52 +0800 Subject: [PATCH 1/2] feat(doc,drive): normalize curly quotes and CRLF in --selection-with-ellipsis before matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-typed --selection-with-ellipsis values frequently come from pasted prose that auto-correction has rewritten to curly quotes (U+2018/2019/ 201C/201D) or CRLF line endings, while the Lark docx store keeps punctuation in straight ASCII / LF form. Server-side matching is strict byte comparison, so a single curly quote defeats an otherwise correct selection and the Agent has to retry with guesswork (Case 7 in the lark-cli pitfall list). Add shortcuts/common.NormalizeSelectionWithEllipsis that rewrites only the transformations that are virtually always safe (curly→straight quotes and CR→LF). CJK punctuation and full-width Latin are intentionally left alone — those can legitimately appear verbatim in document prose, so rewriting them would break otherwise-valid selections. Wire it into the three call sites that send --selection-with-ellipsis to MCP: - docs +update (DryRun silent, Execute emits a 'note:' to stderr when normalization happened so the user can see what was changed) - docs +media-insert (silent — the existing locate-doc step already surfaces selection feedback on miss) - drive +add-comment Execute (emits the same 'note:' as docs +update) The helper lives in shortcuts/common so both doc and drive packages can share it without duplication. --- shortcuts/common/selection_normalize.go | 40 +++++++++ shortcuts/common/selection_normalize_test.go | 90 ++++++++++++++++++++ shortcuts/doc/doc_media_insert.go | 4 +- shortcuts/doc/docs_update.go | 10 ++- shortcuts/drive/drive_add_comment.go | 9 +- 5 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 shortcuts/common/selection_normalize.go create mode 100644 shortcuts/common/selection_normalize_test.go diff --git a/shortcuts/common/selection_normalize.go b/shortcuts/common/selection_normalize.go new file mode 100644 index 000000000..1b357c0b7 --- /dev/null +++ b/shortcuts/common/selection_normalize.go @@ -0,0 +1,40 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package common + +import "strings" + +// NormalizeSelectionWithEllipsis returns a canonical form of a user-typed +// --selection-with-ellipsis value suitable for server-side matching, along +// with a flag indicating whether any rewrite happened. +// +// The Lark docx store keeps punctuation in a canonical shape — straight ASCII +// quotes, LF line endings — while user-provided selection strings often come +// from pasted prose that has been auto-corrected to curly quotes, CRLF, or +// other typographic variants. Matching is strict byte-level, so a curly/ +// straight mismatch on a single character is enough to defeat the whole +// selection. +// +// The normalization set is deliberately conservative: only transformations +// that are virtually always safe (typographic quotes and CR line endings) +// are applied. Full/half-width Latin punctuation or CJK punctuation is left +// alone, since those can legitimately appear verbatim in the document body. +func NormalizeSelectionWithEllipsis(s string) (string, bool) { + if s == "" { + return s, false + } + out := s + // Curly single quotes → ASCII apostrophe. + out = strings.ReplaceAll(out, "\u2018", "'") + out = strings.ReplaceAll(out, "\u2019", "'") + // Curly double quotes → ASCII double quote. + out = strings.ReplaceAll(out, "\u201C", "\"") + out = strings.ReplaceAll(out, "\u201D", "\"") + // CRLF / standalone CR → LF. Lark stores LF internally; sending CRLF in + // a selection would require the document to contain literal CR bytes, + // which it never does. + out = strings.ReplaceAll(out, "\r\n", "\n") + out = strings.ReplaceAll(out, "\r", "\n") + return out, out != s +} diff --git a/shortcuts/common/selection_normalize_test.go b/shortcuts/common/selection_normalize_test.go new file mode 100644 index 000000000..038368bf7 --- /dev/null +++ b/shortcuts/common/selection_normalize_test.go @@ -0,0 +1,90 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package common + +import "testing" + +func TestNormalizeSelectionWithEllipsis(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + want string + wantChanged bool + }{ + { + name: "empty passes through", + input: "", + want: "", + wantChanged: false, + }, + { + name: "ascii-only selection is untouched", + input: "欢迎大家多给反馈", + want: "欢迎大家多给反馈", + wantChanged: false, + }, + { + name: "curly single quotes normalized", + input: "\u2018That\u2019s All\u2019", + want: "'That's All'", + wantChanged: true, + }, + { + name: "curly double quotes normalized", + input: "he said \u201Chello\u201D", + want: "he said \"hello\"", + wantChanged: true, + }, + { + name: "mixed curly + straight normalized", + input: "start\u2019s...end", + want: "start's...end", + wantChanged: true, + }, + { + name: "crlf collapsed to lf", + input: "line1\r\nline2", + want: "line1\nline2", + wantChanged: true, + }, + { + name: "standalone cr collapsed to lf", + input: "line1\rline2", + want: "line1\nline2", + wantChanged: true, + }, + { + name: "already lf is untouched", + input: "line1\nline2", + want: "line1\nline2", + wantChanged: false, + }, + { + name: "chinese punctuation deliberately untouched", + input: "你好,世界", + want: "你好,世界", + wantChanged: false, + }, + { + name: "fullwidth latin deliberately untouched", + input: "ABC", + want: "ABC", + wantChanged: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, changed := NormalizeSelectionWithEllipsis(tt.input) + if got != tt.want { + t.Errorf("NormalizeSelectionWithEllipsis(%q) = %q, want %q", tt.input, got, tt.want) + } + if changed != tt.wantChanged { + t.Errorf("NormalizeSelectionWithEllipsis(%q) changed=%v, want %v", tt.input, changed, tt.wantChanged) + } + }) + } +} diff --git a/shortcuts/doc/doc_media_insert.go b/shortcuts/doc/doc_media_insert.go index cd34db569..8c8829c98 100644 --- a/shortcuts/doc/doc_media_insert.go +++ b/shortcuts/doc/doc_media_insert.go @@ -109,7 +109,7 @@ var DocMediaInsert = common.Shortcut{ } mediaType := runtime.Str("type") caption := runtime.Str("caption") - selection := strings.TrimSpace(runtime.Str("selection-with-ellipsis")) + selection, _ := common.NormalizeSelectionWithEllipsis(strings.TrimSpace(runtime.Str("selection-with-ellipsis"))) hasSelection := selection != "" fileViewType := fileViewMap[runtime.Str("file-view")] @@ -251,7 +251,7 @@ var DocMediaInsert = common.Shortcut{ } fmt.Fprintf(runtime.IO().ErrOut, "Root block ready: %s (%d children)\n", parentBlockID, insertIndex) - selection := strings.TrimSpace(runtime.Str("selection-with-ellipsis")) + selection, _ := common.NormalizeSelectionWithEllipsis(strings.TrimSpace(runtime.Str("selection-with-ellipsis"))) if selection != "" { before := runtime.Bool("before") // Redact the selection when logging — it is copied verbatim from diff --git a/shortcuts/doc/docs_update.go b/shortcuts/doc/docs_update.go index 04ae4bc16..c7b649c44 100644 --- a/shortcuts/doc/docs_update.go +++ b/shortcuts/doc/docs_update.go @@ -78,7 +78,8 @@ var DocsUpdate = common.Shortcut{ args["markdown"] = v } if v := runtime.Str("selection-with-ellipsis"); v != "" { - args["selection_with_ellipsis"] = v + normalized, _ := common.NormalizeSelectionWithEllipsis(v) + args["selection_with_ellipsis"] = normalized } if v := runtime.Str("selection-by-title"); v != "" { args["selection_by_title"] = v @@ -111,7 +112,12 @@ var DocsUpdate = common.Shortcut{ args["markdown"] = markdown } if v := runtime.Str("selection-with-ellipsis"); v != "" { - args["selection_with_ellipsis"] = v + normalized, changed := common.NormalizeSelectionWithEllipsis(v) + if changed { + fmt.Fprintf(runtime.IO().ErrOut, + "note: normalized --selection-with-ellipsis (curly quotes / CR line endings rewritten to canonical ASCII form for matching)\n") + } + args["selection_with_ellipsis"] = normalized } if v := runtime.Str("selection-by-title"); v != "" { args["selection_by_title"] = v diff --git a/shortcuts/drive/drive_add_comment.go b/shortcuts/drive/drive_add_comment.go index ccd21b760..045a94047 100644 --- a/shortcuts/drive/drive_add_comment.go +++ b/shortcuts/drive/drive_add_comment.go @@ -159,7 +159,7 @@ var DriveAddComment = common.Shortcut{ } // Doc/docx comment dry-run. - selection := runtime.Str("selection-with-ellipsis") + selection, _ := common.NormalizeSelectionWithEllipsis(runtime.Str("selection-with-ellipsis")) mode := resolveCommentMode(runtime.Bool("full-comment"), selection, blockID) createPath := "/open-apis/drive/v1/files/:file_token/new_comments" @@ -241,7 +241,12 @@ var DriveAddComment = common.Shortcut{ return executeSheetComment(runtime, docRef) } - selection := runtime.Str("selection-with-ellipsis") + rawSelection := runtime.Str("selection-with-ellipsis") + selection, normalized := common.NormalizeSelectionWithEllipsis(rawSelection) + if normalized { + fmt.Fprintf(runtime.IO().ErrOut, + "note: normalized --selection-with-ellipsis (curly quotes / CR line endings rewritten to canonical ASCII form for matching)\n") + } blockID := strings.TrimSpace(runtime.Str("block-id")) mode := resolveCommentMode(runtime.Bool("full-comment"), selection, blockID) From e7b7446ae347f1d7ff43a534775c5ce1bfe156b5 Mon Sep 17 00:00:00 2001 From: baiqing Date: Thu, 23 Apr 2026 11:32:22 +0800 Subject: [PATCH 2/2] test(common): rename CJK no-op case and add genuine ASCII-only case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit pointed out the 'ascii-only selection is untouched' case actually had a CJK input ('欢迎大家多给反馈'), which was misleading. Rename that entry to 'cjk-only selection is untouched' and add a separate ASCII-only case with 'hello world' so the table now covers both no-op scripts explicitly. --- shortcuts/common/selection_normalize_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/shortcuts/common/selection_normalize_test.go b/shortcuts/common/selection_normalize_test.go index 038368bf7..32d7ca3bb 100644 --- a/shortcuts/common/selection_normalize_test.go +++ b/shortcuts/common/selection_normalize_test.go @@ -21,11 +21,17 @@ func TestNormalizeSelectionWithEllipsis(t *testing.T) { wantChanged: false, }, { - name: "ascii-only selection is untouched", + name: "cjk-only selection is untouched", input: "欢迎大家多给反馈", want: "欢迎大家多给反馈", wantChanged: false, }, + { + name: "ascii-only selection is untouched", + input: "hello world", + want: "hello world", + wantChanged: false, + }, { name: "curly single quotes normalized", input: "\u2018That\u2019s All\u2019",