From 4708a17815496b1beedf5fab306b212ed3e01161 Mon Sep 17 00:00:00 2001
From: tianzhou <t@bytebase.com>
Date: Fri, 13 Mar 2026 04:13:42 -0700
Subject: [PATCH] fix: preserve schema qualifiers in function/procedure bodies
 (#354)

Previously, schema qualifiers (e.g., public.test) were unconditionally
stripped from function/procedure bodies during normalization. This broke
functions with restrictive search_path settings (SET search_path = '',
pg_temp, etc.) where qualified references are required.

Changes:
- Move schema stripping from normalization time to comparison time
- Make stripSchemaQualifications skip dollar-quoted blocks so function
  bodies applied to embedded postgres preserve their qualifiers
- Export StripSchemaPrefixFromBody for use by the diff package
- Fix dollar-quote regex to match PostgreSQL grammar (no $1$ false positives)
- Add unit tests for splitDollarQuotedSegments

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 internal/diff/function.go                     | 15 +++-
 internal/diff/procedure.go                    |  4 +-
 internal/postgres/desired_state.go            | 73 ++++++++++++++++++-
 internal/postgres/desired_state_test.go       | 73 +++++++++++++++++++
 ir/normalize.go                               | 16 ++--
 ir/normalize_test.go                          |  4 +-
 .../issue_354_empty_search_path/diff.sql      |  2 +-
 .../issue_354_empty_search_path/plan.json     |  2 +-
 .../issue_354_empty_search_path/plan.sql      |  2 +-
 .../issue_354_empty_search_path/plan.txt      |  2 +-
 .../diff/create_trigger/add_trigger/plan.json |  2 +-
 .../dependency/table_to_function/diff.sql     |  2 +-
 .../dependency/table_to_function/plan.json    |  2 +-
 .../dependency/table_to_function/plan.sql     |  2 +-
 .../dependency/table_to_function/plan.txt     |  2 +-
 .../pgschema.sql                              |  6 +-
 .../pgschema.sql                              |  6 +-
 17 files changed, 185 insertions(+), 30 deletions(-)

diff --git a/internal/diff/function.go b/internal/diff/function.go
index d71cf46a..c1427053 100644
--- a/internal/diff/function.go
+++ b/internal/diff/function.go
@@ -371,7 +371,7 @@ func functionsEqualExceptAttributes(old, new *ir.Function) bool {
 	if old.Name != new.Name {
 		return false
 	}
-	if old.Definition != new.Definition {
+	if !definitionsEqualIgnoringSchema(old.Definition, new.Definition, old.Schema) {
 		return false
 	}
 	if old.ReturnType != new.ReturnType {
@@ -409,7 +409,7 @@ func functionsEqual(old, new *ir.Function) bool {
 	if old.Name != new.Name {
 		return false
 	}
-	if old.Definition != new.Definition {
+	if !definitionsEqualIgnoringSchema(old.Definition, new.Definition, old.Schema) {
 		return false
 	}
 	if old.ReturnType != new.ReturnType {
@@ -458,7 +458,7 @@ func functionsEqualExceptComment(old, new *ir.Function) bool {
 	if old.Name != new.Name {
 		return false
 	}
-	if old.Definition != new.Definition {
+	if !definitionsEqualIgnoringSchema(old.Definition, new.Definition, old.Schema) {
 		return false
 	}
 	if old.ReturnType != new.ReturnType {
@@ -522,6 +522,15 @@ func functionRequiresRecreate(old, new *ir.Function) bool {
 	return false
 }
 
+// definitionsEqualIgnoringSchema compares two function/procedure definitions,
+// stripping the given schema qualifier from both before comparing. This allows
+// definitions that differ only in schema qualification (e.g., "public.test" vs "test")
+// to be treated as equal, while preserving the original qualifiers in the IR for
+// correct DDL generation. (Issue #354)
+func definitionsEqualIgnoringSchema(a, b, schema string) bool {
+	return ir.StripSchemaPrefixFromBody(a, schema) == ir.StripSchemaPrefixFromBody(b, schema)
+}
+
 // filterNonTableParameters filters out TABLE mode parameters
 // TABLE parameters are output columns in RETURNS TABLE() and shouldn't be compared as input parameters
 func filterNonTableParameters(params []*ir.Parameter) []*ir.Parameter {
diff --git a/internal/diff/procedure.go b/internal/diff/procedure.go
index ce86a107..0c4f77f0 100644
--- a/internal/diff/procedure.go
+++ b/internal/diff/procedure.go
@@ -260,7 +260,7 @@ func proceduresEqual(old, new *ir.Procedure) bool {
 	if old.Name != new.Name {
 		return false
 	}
-	if old.Definition != new.Definition {
+	if !definitionsEqualIgnoringSchema(old.Definition, new.Definition, old.Schema) {
 		return false
 	}
 	if old.Language != new.Language {
@@ -296,7 +296,7 @@ func proceduresEqualExceptComment(old, new *ir.Procedure) bool {
 	if old.Name != new.Name {
 		return false
 	}
-	if old.Definition != new.Definition {
+	if !definitionsEqualIgnoringSchema(old.Definition, new.Definition, old.Schema) {
 		return false
 	}
 	if old.Language != new.Language {
diff --git a/internal/postgres/desired_state.go b/internal/postgres/desired_state.go
index beb0561e..114a9008 100644
--- a/internal/postgres/desired_state.go
+++ b/internal/postgres/desired_state.go
@@ -96,6 +96,77 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 		return sql
 	}
 
+	// Split SQL into dollar-quoted and non-dollar-quoted segments.
+	// Schema qualifiers inside function/procedure bodies (dollar-quoted blocks)
+	// must be preserved — the user may need them when search_path doesn't include
+	// the function's schema (e.g., SET search_path = ''). (Issue #354)
+	segments := splitDollarQuotedSegments(sql)
+	var result strings.Builder
+	result.Grow(len(sql))
+	for _, seg := range segments {
+		if seg.quoted {
+			// Preserve dollar-quoted content as-is
+			result.WriteString(seg.text)
+		} else {
+			result.WriteString(stripSchemaQualificationsFromText(seg.text, schemaName))
+		}
+	}
+	return result.String()
+}
+
+// dollarQuotedSegment represents a segment of SQL text, either inside or outside a dollar-quoted block.
+type dollarQuotedSegment struct {
+	text   string
+	quoted bool // true if this segment is inside dollar quotes (including the delimiters)
+}
+
+// splitDollarQuotedSegments splits SQL text into segments that are either inside or outside
+// dollar-quoted blocks ($$...$$, $tag$...$tag$, etc.). This allows callers to process
+// only the non-quoted parts while preserving function/procedure bodies verbatim.
+// dollarQuoteRe matches PostgreSQL dollar-quote tags: $$ or $identifier$ where the
+// identifier must start with a letter or underscore (not a digit). This avoids
+// false positives on $1, $2 etc. parameter references.
+var dollarQuoteRe = regexp.MustCompile(`\$(?:[a-zA-Z_][a-zA-Z0-9_]*)?\$`)
+
+func splitDollarQuotedSegments(sql string) []dollarQuotedSegment {
+	var segments []dollarQuotedSegment
+
+	pos := 0
+	for pos < len(sql) {
+		// Find the next dollar-quote opening tag
+		loc := dollarQuoteRe.FindStringIndex(sql[pos:])
+		if loc == nil {
+			// No more dollar quotes — rest is unquoted
+			segments = append(segments, dollarQuotedSegment{text: sql[pos:], quoted: false})
+			break
+		}
+
+		openStart := pos + loc[0]
+		openEnd := pos + loc[1]
+		tag := sql[openStart:openEnd]
+
+		// Add the unquoted segment before this tag
+		if openStart > pos {
+			segments = append(segments, dollarQuotedSegment{text: sql[pos:openStart], quoted: false})
+		}
+
+		// Find the matching closing tag
+		closeIdx := strings.Index(sql[openEnd:], tag)
+		if closeIdx == -1 {
+			// No closing tag — treat rest as quoted (unterminated)
+			segments = append(segments, dollarQuotedSegment{text: sql[openStart:], quoted: true})
+			pos = len(sql)
+		} else {
+			closeEnd := openEnd + closeIdx + len(tag)
+			segments = append(segments, dollarQuotedSegment{text: sql[openStart:closeEnd], quoted: true})
+			pos = closeEnd
+		}
+	}
+	return segments
+}
+
+// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
+func stripSchemaQualificationsFromText(text string, schemaName string) string {
 	// Escape the schema name for use in regex
 	escapedSchema := regexp.QuoteMeta(schemaName)
 
@@ -133,7 +204,7 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 	pattern4 := fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
 	re4 := regexp.MustCompile(pattern4)
 
-	result := sql
+	result := text
 	// Apply in order: quoted schema first to avoid double-matching
 	result = re1.ReplaceAllString(result, "$1")
 	result = re2.ReplaceAllString(result, "$1")
diff --git a/internal/postgres/desired_state_test.go b/internal/postgres/desired_state_test.go
index c1851544..77fb31f4 100644
--- a/internal/postgres/desired_state_test.go
+++ b/internal/postgres/desired_state_test.go
@@ -1,9 +1,82 @@
 package postgres
 
 import (
+	"reflect"
 	"testing"
 )
 
+func TestSplitDollarQuotedSegments(t *testing.T) {
+	tests := []struct {
+		name     string
+		sql      string
+		expected []dollarQuotedSegment
+	}{
+		{
+			name:     "no dollar quotes",
+			sql:      "SELECT 1 FROM public.users;",
+			expected: []dollarQuotedSegment{{text: "SELECT 1 FROM public.users;", quoted: false}},
+		},
+		{
+			name: "simple dollar-quoted body",
+			sql:  "CREATE FUNCTION f() AS $$body$$ LANGUAGE sql;",
+			expected: []dollarQuotedSegment{
+				{text: "CREATE FUNCTION f() AS ", quoted: false},
+				{text: "$$body$$", quoted: true},
+				{text: " LANGUAGE sql;", quoted: false},
+			},
+		},
+		{
+			name: "named dollar-quote tag",
+			sql:  "AS $func$body$func$ LANGUAGE sql;",
+			expected: []dollarQuotedSegment{
+				{text: "AS ", quoted: false},
+				{text: "$func$body$func$", quoted: true},
+				{text: " LANGUAGE sql;", quoted: false},
+			},
+		},
+		{
+			name: "parameter references not treated as dollar quotes",
+			sql:  "SELECT $1 + $2 FROM t;",
+			expected: []dollarQuotedSegment{
+				{text: "SELECT $1 + $2 FROM t;", quoted: false},
+			},
+		},
+		{
+			name: "multiple dollar-quoted blocks",
+			sql:  "AS $$body1$$; AS $f$body2$f$;",
+			expected: []dollarQuotedSegment{
+				{text: "AS ", quoted: false},
+				{text: "$$body1$$", quoted: true},
+				{text: "; AS ", quoted: false},
+				{text: "$f$body2$f$", quoted: true},
+				{text: ";", quoted: false},
+			},
+		},
+		{
+			name: "unterminated dollar quote",
+			sql:  "AS $$body without end",
+			expected: []dollarQuotedSegment{
+				{text: "AS ", quoted: false},
+				{text: "$$body without end", quoted: true},
+			},
+		},
+		{
+			name:     "empty input",
+			sql:      "",
+			expected: nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := splitDollarQuotedSegments(tt.sql)
+			if !reflect.DeepEqual(result, tt.expected) {
+				t.Errorf("splitDollarQuotedSegments(%q)\n  got:  %+v\n  want: %+v", tt.sql, result, tt.expected)
+			}
+		})
+	}
+}
+
 func TestReplaceSchemaInSearchPath(t *testing.T) {
 	tests := []struct {
 		name         string
diff --git a/ir/normalize.go b/ir/normalize.go
index aa61db79..2b62e894 100644
--- a/ir/normalize.go
+++ b/ir/normalize.go
@@ -280,7 +280,7 @@ func normalizeView(view *View) {
 
 	// Strip same-schema qualifiers from view definition for consistent comparison.
 	// This uses the same logic as function/procedure body normalization.
-	view.Definition = stripSchemaPrefixFromBody(view.Definition, view.Schema)
+	view.Definition = StripSchemaPrefixFromBody(view.Definition, view.Schema)
 
 	// Normalize triggers on the view (e.g., INSTEAD OF triggers)
 	for _, trigger := range view.Triggers {
@@ -321,8 +321,10 @@ func normalizeFunction(function *Function) {
 	}
 	// Normalize function body to handle whitespace differences
 	function.Definition = normalizeFunctionDefinition(function.Definition)
-	// Strip current schema qualifier from function body for consistent unqualified output
-	function.Definition = stripSchemaPrefixFromBody(function.Definition, function.Schema)
+	// Note: We intentionally do NOT strip schema qualifiers from function bodies here.
+	// Functions may have SET search_path that excludes their own schema, making
+	// qualified references (e.g., public.test) necessary. Stripping is done at
+	// comparison time in the diff package instead. (Issue #354)
 }
 
 // normalizeFunctionDefinition normalizes function body whitespace
@@ -344,10 +346,10 @@ func normalizeFunctionDefinition(def string) string {
 	return strings.Join(normalized, "\n")
 }
 
-// stripSchemaPrefixFromBody removes the current schema qualifier from identifiers
+// StripSchemaPrefixFromBody removes the current schema qualifier from identifiers
 // in a function or procedure body. For example, "public.users" becomes "users".
 // It skips single-quoted string literals to avoid modifying string constants.
-func stripSchemaPrefixFromBody(body, schema string) string {
+func StripSchemaPrefixFromBody(body, schema string) string {
 	if body == "" || schema == "" {
 		return body
 	}
@@ -445,8 +447,8 @@ func normalizeProcedure(procedure *Procedure) {
 		}
 	}
 
-	// Strip current schema qualifier from procedure body for consistent unqualified output
-	procedure.Definition = stripSchemaPrefixFromBody(procedure.Definition, procedure.Schema)
+	// Note: We intentionally do NOT strip schema qualifiers from procedure bodies here.
+	// Same rationale as functions — see normalizeFunction. (Issue #354)
 }
 
 // normalizeFunctionReturnType normalizes function return types, especially TABLE types
diff --git a/ir/normalize_test.go b/ir/normalize_test.go
index 78760e3a..cfaf96be 100644
--- a/ir/normalize_test.go
+++ b/ir/normalize_test.go
@@ -93,9 +93,9 @@ func TestStripSchemaPrefixFromBody(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := stripSchemaPrefixFromBody(tt.body, tt.schema)
+			result := StripSchemaPrefixFromBody(tt.body, tt.schema)
 			if result != tt.expected {
-				t.Errorf("stripSchemaPrefixFromBody(%q, %q) = %q, want %q", tt.body, tt.schema, result, tt.expected)
+				t.Errorf("StripSchemaPrefixFromBody(%q, %q) = %q, want %q", tt.body, tt.schema, result, tt.expected)
 			}
 		})
 	}
diff --git a/testdata/diff/create_function/issue_354_empty_search_path/diff.sql b/testdata/diff/create_function/issue_354_empty_search_path/diff.sql
index b01d1a00..27894d06 100644
--- a/testdata/diff/create_function/issue_354_empty_search_path/diff.sql
+++ b/testdata/diff/create_function/issue_354_empty_search_path/diff.sql
@@ -7,6 +7,6 @@ VOLATILE
 SET search_path = ''
 AS $$
 BEGIN
-  INSERT INTO test (title) VALUES (p_title);
+  INSERT INTO public.test (title) VALUES (p_title);
 END;
 $$;
diff --git a/testdata/diff/create_function/issue_354_empty_search_path/plan.json b/testdata/diff/create_function/issue_354_empty_search_path/plan.json
index 674f3a36..36b06669 100644
--- a/testdata/diff/create_function/issue_354_empty_search_path/plan.json
+++ b/testdata/diff/create_function/issue_354_empty_search_path/plan.json
@@ -9,7 +9,7 @@
     {
       "steps": [
         {
-          "sql": "CREATE OR REPLACE FUNCTION create_hello(\n    p_title text\n)\nRETURNS void\nLANGUAGE plpgsql\nVOLATILE\nSET search_path = ''\nAS $$\nBEGIN\n  INSERT INTO test (title) VALUES (p_title);\nEND;\n$$;",
+          "sql": "CREATE OR REPLACE FUNCTION create_hello(\n    p_title text\n)\nRETURNS void\nLANGUAGE plpgsql\nVOLATILE\nSET search_path = ''\nAS $$\nBEGIN\n  INSERT INTO public.test (title) VALUES (p_title);\nEND;\n$$;",
           "type": "function",
           "operation": "create",
           "path": "public.create_hello"
diff --git a/testdata/diff/create_function/issue_354_empty_search_path/plan.sql b/testdata/diff/create_function/issue_354_empty_search_path/plan.sql
index b01d1a00..27894d06 100644
--- a/testdata/diff/create_function/issue_354_empty_search_path/plan.sql
+++ b/testdata/diff/create_function/issue_354_empty_search_path/plan.sql
@@ -7,6 +7,6 @@ VOLATILE
 SET search_path = ''
 AS $$
 BEGIN
-  INSERT INTO test (title) VALUES (p_title);
+  INSERT INTO public.test (title) VALUES (p_title);
 END;
 $$;
diff --git a/testdata/diff/create_function/issue_354_empty_search_path/plan.txt b/testdata/diff/create_function/issue_354_empty_search_path/plan.txt
index 1c1a2b22..43a624d3 100644
--- a/testdata/diff/create_function/issue_354_empty_search_path/plan.txt
+++ b/testdata/diff/create_function/issue_354_empty_search_path/plan.txt
@@ -18,6 +18,6 @@ VOLATILE
 SET search_path = ''
 AS $$
 BEGIN
-  INSERT INTO test (title) VALUES (p_title);
+  INSERT INTO public.test (title) VALUES (p_title);
 END;
 $$;
diff --git a/testdata/diff/create_trigger/add_trigger/plan.json b/testdata/diff/create_trigger/add_trigger/plan.json
index 52eb6442..69cc45db 100644
--- a/testdata/diff/create_trigger/add_trigger/plan.json
+++ b/testdata/diff/create_trigger/add_trigger/plan.json
@@ -3,7 +3,7 @@
   "pgschema_version": "1.7.4",
   "created_at": "1970-01-01T00:00:00Z",
   "source_fingerprint": {
-    "hash": "f709d6c4a0ded1f2bbfaa619c74df2f415695c7caecbaab0760a81d802cae7d0"
+    "hash": "a099356e267a2e28a40672b8d007db5082d1399d9239faa7fcdc194842027381"
   },
   "groups": [
     {
diff --git a/testdata/diff/dependency/table_to_function/diff.sql b/testdata/diff/dependency/table_to_function/diff.sql
index 18a1e39c..22faba8c 100644
--- a/testdata/diff/dependency/table_to_function/diff.sql
+++ b/testdata/diff/dependency/table_to_function/diff.sql
@@ -13,6 +13,6 @@ LANGUAGE plpgsql
 VOLATILE
 AS $$
 BEGIN
-    RETURN (SELECT COUNT(*) FROM documents);
+    RETURN (SELECT COUNT(*) FROM public.documents);
 END;
 $$;
diff --git a/testdata/diff/dependency/table_to_function/plan.json b/testdata/diff/dependency/table_to_function/plan.json
index fe4db841..3ae6855a 100644
--- a/testdata/diff/dependency/table_to_function/plan.json
+++ b/testdata/diff/dependency/table_to_function/plan.json
@@ -15,7 +15,7 @@
           "path": "public.documents"
         },
         {
-          "sql": "CREATE OR REPLACE FUNCTION get_document_count()\nRETURNS integer\nLANGUAGE plpgsql\nVOLATILE\nAS $$\nBEGIN\n    RETURN (SELECT COUNT(*) FROM documents);\nEND;\n$$;",
+          "sql": "CREATE OR REPLACE FUNCTION get_document_count()\nRETURNS integer\nLANGUAGE plpgsql\nVOLATILE\nAS $$\nBEGIN\n    RETURN (SELECT COUNT(*) FROM public.documents);\nEND;\n$$;",
           "type": "function",
           "operation": "create",
           "path": "public.get_document_count"
diff --git a/testdata/diff/dependency/table_to_function/plan.sql b/testdata/diff/dependency/table_to_function/plan.sql
index 61ef271d..fa6287e2 100644
--- a/testdata/diff/dependency/table_to_function/plan.sql
+++ b/testdata/diff/dependency/table_to_function/plan.sql
@@ -12,6 +12,6 @@ LANGUAGE plpgsql
 VOLATILE
 AS $$
 BEGIN
-    RETURN (SELECT COUNT(*) FROM documents);
+    RETURN (SELECT COUNT(*) FROM public.documents);
 END;
 $$;
diff --git a/testdata/diff/dependency/table_to_function/plan.txt b/testdata/diff/dependency/table_to_function/plan.txt
index 3a2c94b2..9d03bc4c 100644
--- a/testdata/diff/dependency/table_to_function/plan.txt
+++ b/testdata/diff/dependency/table_to_function/plan.txt
@@ -27,6 +27,6 @@ LANGUAGE plpgsql
 VOLATILE
 AS $$
 BEGIN
-    RETURN (SELECT COUNT(*) FROM documents);
+    RETURN (SELECT COUNT(*) FROM public.documents);
 END;
 $$;
diff --git a/testdata/dump/issue_252_function_schema_qualifier/pgschema.sql b/testdata/dump/issue_252_function_schema_qualifier/pgschema.sql
index b83b09ec..a56654f5 100644
--- a/testdata/dump/issue_252_function_schema_qualifier/pgschema.sql
+++ b/testdata/dump/issue_252_function_schema_qualifier/pgschema.sql
@@ -43,7 +43,7 @@ LANGUAGE plpgsql
 VOLATILE
 AS $$
 BEGIN
-    RETURN QUERY SELECT u.id, u.name, u.email FROM users u WHERE u.name = p_name;
+    RETURN QUERY SELECT u.id, u.name, u.email FROM public.users u WHERE u.name = p_name;
 END;
 $$;
 
@@ -57,7 +57,7 @@ LANGUAGE plpgsql
 VOLATILE
 AS $$
 BEGIN
-    RETURN (SELECT count(*)::integer FROM users);
+    RETURN (SELECT count(*)::integer FROM public.users);
 END;
 $$;
 
@@ -72,7 +72,7 @@ CREATE OR REPLACE PROCEDURE insert_user(
 LANGUAGE plpgsql
 AS $$
 BEGIN
-    INSERT INTO users (name, email) VALUES (p_name, p_email);
+    INSERT INTO public.users (name, email) VALUES (p_name, p_email);
 END;
 $$;
 
diff --git a/testdata/dump/issue_320_plpgsql_reserved_keyword_type/pgschema.sql b/testdata/dump/issue_320_plpgsql_reserved_keyword_type/pgschema.sql
index b3488ffd..8ba777b0 100644
--- a/testdata/dump/issue_320_plpgsql_reserved_keyword_type/pgschema.sql
+++ b/testdata/dump/issue_320_plpgsql_reserved_keyword_type/pgschema.sql
@@ -29,7 +29,7 @@ AS $$
 DECLARE
     total_count integer;
 BEGIN
-    SELECT count(*)::integer INTO total_count FROM "user";
+    SELECT count(*)::integer INTO total_count FROM public."user";
     RETURN total_count;
 END;
 $$;
@@ -44,9 +44,9 @@ LANGUAGE plpgsql
 VOLATILE
 AS $$
 DECLARE
-    account "user";
+    account public.user;
 BEGIN
-    SELECT * INTO account FROM "user" LIMIT 1;
+    SELECT * INTO account FROM public.user LIMIT 1;
     RETURN account.name;
 END;
 $$;