From ebbee64888082b3a51879e085f21466241ccb013 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 13 May 2026 15:23:35 -0700 Subject: [PATCH 1/3] feat(sql): support JOIN/UNION/MINUS in unified query path Extend V2 ANTLR grammar with JOIN, UNION (ALL/DISTINCT), and MINUS rules. Add ExtendedAstBuilder in SqlV2QueryParser to produce AST nodes for the Calcite-based unified query path. The base AstBuilder throws SyntaxCheckException to preserve legacy engine fallback. Signed-off-by: Chen Dai --- .../sql/api/UnifiedQueryContext.java | 9 +- .../sql/api/parser/SqlV2QueryParser.java | 57 +++++++- .../sql/api/UnifiedQueryContextTest.java | 4 +- .../sql/api/UnifiedQueryPlannerSqlV2Test.java | 135 ++++++++++++++++-- .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/dsl/AstDSL.java | 24 ++++ .../org/opensearch/sql/ast/tree/Minus.java | 40 ++++++ .../org/opensearch/sql/ast/tree/Union.java | 11 +- .../sql/calcite/CalciteRelNodeVisitor.java | 25 +++- .../sql/legacy/LegacyFallbackIT.java | 75 ++++++++++ sql/src/main/antlr/OpenSearchSQLParser.g4 | 27 +++- .../opensearch/sql/sql/parser/AstBuilder.java | 29 +++- .../antlr/SQLSyntaxParserV2ExtensionTest.java | 61 ++++++++ .../sql/sql/parser/AstBuilderTest.java | 18 +++ 14 files changed, 497 insertions(+), 23 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Minus.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserV2ExtensionTest.java diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 7a885edd65a..38c6561b409 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -122,6 +122,11 @@ public static class Builder { * Setting values with defaults from SysLimit.DEFAULT. Only includes planning-required settings * to avoid coupling with OpenSearchSettings. * + *

{@link Settings.Key#PPL_JOIN_SUBSEARCH_MAXOUT} defaults to {@code 0} to avoid injecting + * {@code LogicalSystemLimit} into the logical plan, which is an OpenSearch-specific operational + * concern irrelevant to external consumers of the unified query API. {@link + * Settings.Key#PPL_SUBSEARCH_MAXOUT} is set to {@code 0} for the same reason. + * *

{@link Settings.Key#CALCITE_ENGINE_ENABLED} defaults to {@code true} here because the * unified query path is by definition Calcite-based — every query reaching this context flows * through Calcite's planner, never the v2 engine. The PPL {@link @@ -142,8 +147,8 @@ public static class Builder { new HashMap( Map.of( QUERY_SIZE_LIMIT, SysLimit.DEFAULT.querySizeLimit(), - PPL_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.subsearchLimit(), - PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.joinSubsearchLimit(), + PPL_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.subsearchLimit(), + PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.joinSubsearchLimit(), CALCITE_ENGINE_ENABLED, true, PPL_REX_MAX_MATCH_LIMIT, 10)); diff --git a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java index 827278a2119..b9193fa33c3 100644 --- a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java +++ b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java @@ -5,11 +5,21 @@ package org.opensearch.sql.api.parser; +import static org.opensearch.sql.ast.dsl.AstDSL.join; +import static org.opensearch.sql.ast.dsl.AstDSL.minus; +import static org.opensearch.sql.ast.dsl.AstDSL.union; + +import java.util.Optional; import org.antlr.v4.runtime.tree.ParseTree; +import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.Join.JoinType; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.sql.antlr.SQLSyntaxParser; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinClauseContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MinusSelectContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.UnionSelectContext; import org.opensearch.sql.sql.parser.AstBuilder; import org.opensearch.sql.sql.parser.AstStatementBuilder; @@ -24,7 +34,8 @@ public UnresolvedPlan parse(String query) { ParseTree cst = syntaxParser.parse(query); AstStatementBuilder astStmtBuilder = new AstStatementBuilder( - new AstBuilder(query), AstStatementBuilder.StatementBuilderContext.builder().build()); + new ExtendedAstBuilder(query), + AstStatementBuilder.StatementBuilderContext.builder().build()); Statement statement = cst.accept(astStmtBuilder); if (statement instanceof Query) { @@ -33,4 +44,48 @@ public UnresolvedPlan parse(String query) { throw new UnsupportedOperationException( "Only query statements are supported but got " + statement.getClass().getSimpleName()); } + + /** + * Extends the V2 AstBuilder with legacy-only SQL features (JOIN, UNION, MINUS) that the base + * AstBuilder rejects with SyntaxCheckException to trigger legacy engine fallback. + */ + private static class ExtendedAstBuilder extends AstBuilder { + + ExtendedAstBuilder(String query) { + super(query); + } + + @Override + public UnresolvedPlan visitJoinClause(JoinClauseContext ctx) { + JoinType joinType = toJoinType(ctx); + UnresolvedPlan right = visit(ctx.relation()); + Optional condition = + Optional.ofNullable(ctx.expression()).map(this::visitAstExpression); + return join(right, joinType, condition); + } + + @Override + public UnresolvedPlan visitUnionSelect(UnionSelectContext ctx) { + boolean distinct = ctx.ALL().isEmpty(); + return union(ctx.querySpecification().stream().map(this::visit).toList(), distinct); + } + + @Override + public UnresolvedPlan visitMinusSelect(MinusSelectContext ctx) { + return minus(ctx.querySpecification().stream().map(this::visit).toList()); + } + + private JoinType toJoinType(JoinClauseContext ctx) { + if (ctx.LEFT() != null) { + return JoinType.LEFT; + } + if (ctx.RIGHT() != null) { + return JoinType.RIGHT; + } + if (ctx.CROSS() != null) { + return JoinType.CROSS; + } + return JoinType.INNER; + } + } } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java index f0111d06363..3960e8a6206 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java @@ -30,8 +30,8 @@ public void testContextCreationWithDefaults() { assertNotNull("PlanContext should be created", context.getPlanContext()); assertNotNull("Settings should be created", context.getSettings()); assertEquals( - "Settings should have default system limits", - SysLimit.DEFAULT, + "Settings should have unlimited subsearch limits for clean logical plans", + new SysLimit(SysLimit.DEFAULT.querySizeLimit(), 0, 0), SysLimit.fromSettings(context.getSettings())); assertEquals( "PPL_REX_MAX_MATCH_LIMIT default should be 10", diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java index b9913bf450b..7540681cb7a 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java @@ -5,12 +5,19 @@ package org.opensearch.sql.api; +import static org.apache.calcite.sql.type.SqlTypeName.INTEGER; +import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR; + +import java.util.Map; +import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.junit.Before; import org.junit.Test; import org.opensearch.sql.executor.QueryType; /** - * Tests for basic SQL query planning through the V2 ANTLR parser path. Covers SELECT, WHERE, ORDER - * BY operations that produce valid RelNode plans. + * Tests for SQL query planning through the V2 ANTLR parser path. Covers SELECT, WHERE, ORDER BY, + * JOIN, UNION, and MINUS operations that produce valid RelNode plans. */ public class UnifiedQueryPlannerSqlV2Test extends UnifiedQueryTestBase { @@ -19,17 +26,35 @@ protected QueryType queryType() { return QueryType.SQL; } - @Test - public void selectStar() { - givenQuery("SELECT * FROM catalog.employees") - .assertPlan( - """ - LogicalTableScan(table=[[catalog, employees]]) - """); + @Before + @Override + public void setUp() { + testSchema = + new AbstractSchema() { + @Override + protected Map getTableMap() { + return Map.of( + "employees", createEmployeesTable(), + "departments", createDepartmentsTable()); + } + }; + + context = contextBuilder().build(); + planner = new UnifiedQueryPlanner(context); + } + + private Table createDepartmentsTable() { + return SimpleTable.builder() + .col("dept_id", INTEGER) + .col("dept_name", VARCHAR) + .row(new Object[] {1, "Engineering"}) + .row(new Object[] {2, "Sales"}) + .row(new Object[] {3, "Marketing"}) + .build(); } @Test - public void selectStarFields() { + public void selectStar() { givenQuery("SELECT * FROM catalog.employees") .assertPlan( """ @@ -68,4 +93,94 @@ public void testFilterAndOrderBy() { LogicalTableScan(table=[[catalog, employees]]) """); } + + @Test + public void testJoinTypes() { + Map.of("JOIN", "inner", "LEFT JOIN", "left", "RIGHT JOIN", "right") + .forEach( + (syntax, type) -> + givenQuery( + """ + SELECT * FROM catalog.employees %s catalog.departments + ON employees.department = departments.dept_name + """ + .formatted(syntax)) + .assertPlan( + """ + LogicalJoin(condition=[=($3, $5)], joinType=[%s]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalTableScan(table=[[catalog, departments]]) + """ + .formatted(type))); + } + + @Test + public void testCrossJoin() { + givenQuery("SELECT * FROM catalog.employees CROSS JOIN catalog.departments") + .assertPlan( + """ + LogicalJoin(condition=[true], joinType=[inner]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalTableScan(table=[[catalog, departments]]) + """); + } + + @Test + public void testJoinWithFilterAndOrderBy() { + givenQuery( + """ + SELECT * FROM catalog.employees JOIN catalog.departments + ON employees.department = departments.dept_name + WHERE employees.age > 30 ORDER BY employees.name + """) + .assertPlan( + """ + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalFilter(condition=[>($2, 30)]) + LogicalJoin(condition=[=($3, $5)], joinType=[inner]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalTableScan(table=[[catalog, departments]]) + """); + } + + @Test + public void testUnionAll() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + UNION ALL SELECT dept_name, dept_id FROM catalog.departments + """) + .assertPlanContains("LogicalUnion(all=[true])"); + } + + @Test + public void testUnion() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + UNION SELECT dept_name, dept_id FROM catalog.departments + """) + .assertPlanContains("LogicalUnion(all=[false])"); + } + + @Test + public void testMultiWayUnion() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + UNION ALL SELECT dept_name, dept_id FROM catalog.departments + UNION ALL SELECT name, age FROM catalog.employees + """) + .assertPlanContains("LogicalUnion(all=[true])"); + } + + @Test + public void testMinus() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + MINUS SELECT dept_name, dept_id FROM catalog.departments + """) + .assertPlanContains("LogicalMinus(all=[false])"); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index be02547a2da..b8410d2f349 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -69,6 +69,7 @@ import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; +import org.opensearch.sql.ast.tree.Minus; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.MvExpand; @@ -477,6 +478,10 @@ public T visitUnion(Union node, C context) { return visitChildren(node, context); } + public T visitMinus(Minus node, C context) { + return visitChildren(node, context); + } + public T visitAddTotals(AddTotals node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index b2731ebbd40..644355a5d89 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -60,8 +60,10 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; +import org.opensearch.sql.ast.tree.Minus; import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; @@ -81,6 +83,7 @@ import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.calcite.plan.OpenSearchConstants; @@ -757,4 +760,25 @@ public static Bin bin(UnresolvedExpression field, Argument... arguments) { public static Field implicitTimestampField() { return AstDSL.field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP); } + + public static UnresolvedPlan join( + UnresolvedPlan right, Join.JoinType joinType, Optional condition) { + return new Join( + right, + Optional.empty(), + Optional.empty(), + joinType, + condition, + new Join.JoinHint(), + Optional.empty(), + Argument.ArgumentMap.empty()); + } + + public static UnresolvedPlan union(List children, boolean distinct) { + return new Union(children, distinct, null); + } + + public static UnresolvedPlan minus(List children) { + return new Minus(children); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Minus.java b/core/src/main/java/org/opensearch/sql/ast/tree/Minus.java new file mode 100644 index 00000000000..d67692ec478 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Minus.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +/** Logical plan node for Minus (EXCEPT) operation. Returns rows in left that are not in right. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class Minus extends UnresolvedPlan { + private final List children; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + List newChildren = + ImmutableList.builder().add(child).addAll(children).build(); + return new Minus(newChildren); + } + + @Override + public List getChild() { + return children; + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitMinus(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java index a96831567cb..a598de74a7e 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java @@ -14,7 +14,7 @@ import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; -/** Logical plan node for Union operation. Combines results from multiple datasets (UNION ALL). */ +/** Logical plan node for Union operation. Combines results from multiple datasets. */ @Getter @ToString @EqualsAndHashCode(callSuper = false) @@ -22,14 +22,19 @@ @AllArgsConstructor public class Union extends UnresolvedPlan { private final List datasets; - + private boolean distinct; private Integer maxout; + /** UNION ALL with maxout limit (PPL subsearch). */ + public Union(List datasets, Integer maxout) { + this(datasets, false, maxout); + } + @Override public UnresolvedPlan attach(UnresolvedPlan child) { List newDatasets = ImmutableList.builder().add(child).addAll(datasets).build(); - return new Union(newDatasets, maxout); + return new Union(newDatasets, distinct, maxout); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 1251f51b131..927b0265747 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -135,6 +135,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; import org.opensearch.sql.ast.tree.ML; +import org.opensearch.sql.ast.tree.Minus; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.MvExpand; @@ -535,6 +536,9 @@ private List expandProjectFields( expandedFields.add(resolved); } } + case Alias alias -> { + expandedFields.add(rexVisitor.analyze(alias, context)); + } case AllFields ignored -> { currentFields.stream() .filter(field -> !isMetadataField(field)) @@ -2947,7 +2951,7 @@ public RelNode visitUnion(Union node, CalcitePlanContext context) { for (RelNode input : unifiedInputs) { context.relBuilder.push(input); } - context.relBuilder.union(true, unifiedInputs.size()); // true = UNION ALL + context.relBuilder.union(!node.isDistinct(), unifiedInputs.size()); if (node.getMaxout() != null) { context.relBuilder.push( @@ -2960,6 +2964,25 @@ public RelNode visitUnion(Union node, CalcitePlanContext context) { return context.relBuilder.peek(); } + @Override + public RelNode visitMinus(Minus node, CalcitePlanContext context) { + List inputNodes = new ArrayList<>(); + for (UnresolvedPlan child : node.getChildren()) { + child.accept(this, context); + inputNodes.add(context.relBuilder.build()); + } + + List unifiedInputs = + SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context); + + for (RelNode input : unifiedInputs) { + context.relBuilder.push(input); + } + context.relBuilder.minus(false, unifiedInputs.size()); + + return context.relBuilder.peek(); + } + /* * Unsupported Commands of PPL with Calcite for OpenSearch 3.0.0-beta */ diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java new file mode 100644 index 00000000000..c2e518cff2b --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java @@ -0,0 +1,75 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.legacy; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PEOPLE; + +import java.io.IOException; +import java.util.Locale; +import org.json.JSONObject; +import org.junit.Test; + +/** + * Integration tests verifying that SQL queries unsupported by the V2 engine (JOIN, UNION, MINUS, + * subqueries) correctly fall back to the legacy engine and return valid results. + * + *

These tests replace coverage lost when legacy ITs (JoinIT, SubqueryIT, MultiQueryIT) were + * excluded due to JSON response format deprecation in 3.0. They assert only successful execution + * (HTTP 200 + non-empty results) rather than exact response structure. + */ +public class LegacyFallbackIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.DOG); + loadIndex(Index.PEOPLE); + } + + @Test + public void testInnerJoinFallback() throws IOException { + JSONObject result = + executeQuery( + String.format( + Locale.ROOT, + "SELECT a.firstname, d.dog_name FROM %s a JOIN %s d" + + " ON d.holdersName = a.firstname WHERE a.age > 25", + TEST_INDEX_PEOPLE, + TEST_INDEX_DOG)); + assertTrue("JOIN query should return results", result.getJSONArray("datarows").length() > 0); + } + + @Test + public void testLeftJoinFallback() throws IOException { + JSONObject result = + executeQuery( + String.format( + Locale.ROOT, + "SELECT a.firstname, d.dog_name FROM %s a LEFT JOIN %s d" + + " ON d.holdersName = a.firstname", + TEST_INDEX_PEOPLE, + TEST_INDEX_DOG)); + assertTrue( + "LEFT JOIN query should return results", result.getJSONArray("datarows").length() > 0); + } + + // Note: UNION and MINUS are not tested here because the legacy engine's UNION/MINUS support + // has known issues (MultiQueryIT is @Ignored). The V2 fallback works correctly (verified in + // cluster logs), but the legacy engine itself cannot execute these queries reliably. + + @Test + public void testInSubqueryFallback() throws IOException { + JSONObject result = + executeQuery( + String.format( + Locale.ROOT, + "SELECT firstname FROM %s WHERE age IN (SELECT age FROM %s WHERE age > 30)", + TEST_INDEX_PEOPLE, + TEST_INDEX_PEOPLE)); + assertTrue( + "IN subquery should return results", result.getJSONArray("datarows").length() > 0); + } +} diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 6b34507eacc..19c8aca53e5 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -32,6 +32,20 @@ parser grammar OpenSearchSQLParser; options { tokenVocab = OpenSearchSQLLexer; } + +@members { + /** + * Returns true if the next token is a join keyword that should not be consumed as a table alias. + * LEFT/RIGHT are valid identifiers (text function names), so without this predicate ANTLR4 + * greedily consumes them as implicit table aliases (e.g., FROM t1 LEFT becomes alias='LEFT' + * instead of starting a LEFT JOIN clause). + */ + private boolean isJoinKeyword() { + int t = _input.LT(1).getType(); + return t == LEFT || t == RIGHT || t == INNER || t == CROSS || t == JOIN; + } +} + // Top Level Description // Root rule @@ -54,7 +68,9 @@ dmlStatement // Primary DML Statements selectStatement - : querySpecification # simpleSelect + : querySpecification # simpleSelect + | querySpecification (UNION ALL? querySpecification)+ # unionSelect + | querySpecification EXCEPT querySpecification # minusSelect ; adminStatement @@ -104,12 +120,17 @@ selectElement ; fromClause - : FROM relation (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY + : FROM relation joinClause* (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY ; +joinClause + : (INNER | CROSS)? JOIN relation (ON expression)? + | (LEFT | RIGHT) OUTER? JOIN relation (ON expression)? + ; + relation - : tableName (AS? alias)? # tableAsRelation + : tableName (AS alias | {!isJoinKeyword()}? alias)? # tableAsRelation | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation | qualifiedName LR_BRACKET tableFunctionArgs RR_BRACKET (AS? alias)? # tableFunctionRelation ; diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index 5250ab7fb0f..915a44c2d80 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -32,6 +32,7 @@ import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.DescribeRelation; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.Relation; @@ -139,6 +140,13 @@ public UnresolvedPlan visitLimitClause(OpenSearchSQLParser.LimitClauseContext ct public UnresolvedPlan visitFromClause(FromClauseContext ctx) { UnresolvedPlan result = visit(ctx.relation()); + // Dispatch each joinClause to visitJoinClause (throws in base, overridden in subclass) + for (var joinCtx : ctx.joinClause()) { + UnresolvedPlan joinPlan = visit(joinCtx); + ((Join) joinPlan).attach(result); + result = joinPlan; + } + if (ctx.whereClause() != null) { result = visit(ctx.whereClause()).attach(result); } @@ -163,6 +171,24 @@ public UnresolvedPlan visitFromClause(FromClauseContext ctx) { return result; } + @Override + public UnresolvedPlan visitJoinClause(OpenSearchSQLParser.JoinClauseContext ctx) { + throw new SyntaxCheckException( + "JOIN is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitUnionSelect(OpenSearchSQLParser.UnionSelectContext ctx) { + throw new SyntaxCheckException( + "UNION is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitMinusSelect(OpenSearchSQLParser.MinusSelectContext ctx) { + throw new SyntaxCheckException( + "EXCEPT is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + /** * Ensure NESTED function is not used in HAVING clause and fallback to legacy engine. Can remove * when support is added for NESTED function in HAVING clause. @@ -261,7 +287,8 @@ protected UnresolvedPlan aggregateResult(UnresolvedPlan aggregate, UnresolvedPla return nextResult != null ? nextResult : aggregate; } - private UnresolvedExpression visitAstExpression(ParseTree tree) { + /** Visible to subclasses that need to parse expressions (e.g., JOIN ON condition). */ + protected UnresolvedExpression visitAstExpression(ParseTree tree) { return expressionBuilder.visit(tree); } diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserV2ExtensionTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserV2ExtensionTest.java new file mode 100644 index 00000000000..19f4a6a4cc2 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserV2ExtensionTest.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.antlr; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import org.junit.jupiter.api.Test; + +/** + * Tests for V2 extended SQL grammar features: JOIN, UNION, and MINUS (EXCEPT). These features are + * parsed by the V2 ANTLR grammar but throw SyntaxCheckException in the base AstBuilder to trigger + * legacy engine fallback. The extended AstBuilder in the unified query path handles them. + */ +class SQLSyntaxParserV2ExtensionTest { + + private final SQLSyntaxParser parser = new SQLSyntaxParser(); + + @Test + void canParseInnerJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 JOIN t2 ON t1.id = t2.id")); + } + + @Test + void canParseLeftJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id")); + } + + @Test + void canParseRightJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id")); + } + + @Test + void canParseCrossJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 CROSS JOIN t2")); + } + + @Test + void canParseUnionAll() { + assertNotNull(parser.parse("SELECT a FROM t1 UNION ALL SELECT b FROM t2")); + } + + @Test + void canParseUnion() { + assertNotNull(parser.parse("SELECT a FROM t1 UNION SELECT b FROM t2")); + } + + @Test + void canParseMinus() { + assertNotNull(parser.parse("SELECT a FROM t1 MINUS SELECT b FROM t2")); + } + + @Test + void canParseMultiWayUnion() { + assertNotNull( + parser.parse("SELECT a FROM t1 UNION ALL SELECT b FROM t2 UNION ALL SELECT c FROM t3")); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java index 695cf85b144..69f4035905c 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java @@ -729,4 +729,22 @@ public void can_build_string_literal_highlight() { alias("highlight(\"fieldA\")", highlight(AstDSL.stringLiteral("fieldA"), args))), buildAST("SELECT highlight(\"fieldA\") FROM test")); } + + @Test + public void join_throws_syntax_check_exception_for_legacy_fallback() { + assertThrows( + SyntaxCheckException.class, () -> buildAST("SELECT * FROM t1 JOIN t2 ON t1.id = t2.id")); + } + + @Test + public void union_throws_syntax_check_exception_for_legacy_fallback() { + assertThrows( + SyntaxCheckException.class, () -> buildAST("SELECT a FROM t1 UNION ALL SELECT b FROM t2")); + } + + @Test + public void except_throws_syntax_check_exception_for_legacy_fallback() { + assertThrows( + SyntaxCheckException.class, () -> buildAST("SELECT a FROM t1 MINUS SELECT b FROM t2")); + } } From 45a0e3bdeb20dfead1405e5ad90f13c57c6ef339 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 14 May 2026 18:54:22 -0700 Subject: [PATCH 2/3] feat(sql): add IN/EXISTS subquery support in unified query path Add grammar rules (inSubqueryPredicate, existsSubqueryExpressionAtom) and wire them through ExtendedAstExpressionBuilder to produce InSubquery and ExistsSubquery AST nodes. Base AstExpressionBuilder throws SyntaxCheckException to preserve legacy engine fallback. Also simplify joinClause by inlining join types (no separate joinType rule) and make AstBuilder.expressionBuilder overridable via factory method. Signed-off-by: Chen Dai --- .../sql/api/parser/SqlV2QueryParser.java | 28 ++++++++++++ .../sql/api/UnifiedQueryPlannerSqlV2Test.java | 38 ++++++++++++++++ .../sql/legacy/LegacyFallbackIT.java | 45 ++++++------------- sql/src/main/antlr/OpenSearchSQLParser.g4 | 2 + .../opensearch/sql/sql/parser/AstBuilder.java | 7 ++- .../sql/sql/parser/AstExpressionBuilder.java | 15 +++++++ 6 files changed, 103 insertions(+), 32 deletions(-) diff --git a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java index b9193fa33c3..3c66165f1e9 100644 --- a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java +++ b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java @@ -9,18 +9,24 @@ import static org.opensearch.sql.ast.dsl.AstDSL.minus; import static org.opensearch.sql.ast.dsl.AstDSL.union; +import java.util.List; import java.util.Optional; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; +import org.opensearch.sql.ast.expression.subquery.InSubquery; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.ast.tree.Join.JoinType; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.sql.antlr.SQLSyntaxParser; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ExistsSubqueryExpressionAtomContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.InSubqueryPredicateContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinClauseContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MinusSelectContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.UnionSelectContext; import org.opensearch.sql.sql.parser.AstBuilder; +import org.opensearch.sql.sql.parser.AstExpressionBuilder; import org.opensearch.sql.sql.parser.AstStatementBuilder; /** SQL query parser that produces {@link UnresolvedPlan} using the V2 ANTLR grammar. */ @@ -55,6 +61,11 @@ private static class ExtendedAstBuilder extends AstBuilder { super(query); } + @Override + protected AstExpressionBuilder createExpressionBuilder() { + return new ExtendedAstExpressionBuilder(); + } + @Override public UnresolvedPlan visitJoinClause(JoinClauseContext ctx) { JoinType joinType = toJoinType(ctx); @@ -87,5 +98,22 @@ private JoinType toJoinType(JoinClauseContext ctx) { } return JoinType.INNER; } + + /** Expression builder with IN/EXISTS subquery support. */ + private class ExtendedAstExpressionBuilder extends AstExpressionBuilder { + + @Override + public UnresolvedExpression visitInSubqueryPredicate(InSubqueryPredicateContext ctx) { + UnresolvedPlan subquery = ExtendedAstBuilder.this.visit(ctx.querySpecification()); + return new InSubquery(List.of(visit(ctx.predicate())), subquery); + } + + @Override + public UnresolvedExpression visitExistsSubqueryExpressionAtom( + ExistsSubqueryExpressionAtomContext ctx) { + UnresolvedPlan subquery = ExtendedAstBuilder.this.visit(ctx.querySpecification()); + return new ExistsSubquery(subquery); + } + } } } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java index 7540681cb7a..ce631651ab0 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java @@ -183,4 +183,42 @@ public void testMinus() { """) .assertPlanContains("LogicalMinus(all=[false])"); } + + @Test + public void testInSubquery() { + givenQuery( + """ + SELECT name FROM catalog.employees + WHERE age IN (SELECT age FROM catalog.departments WHERE dept_name = 'Engineering') + """) + .assertPlan( + """ + LogicalProject(name=[$1]) + LogicalFilter(condition=[IN($2, { + LogicalProject(age=[$cor0.age]) + LogicalFilter(condition=[=($1, 'Engineering')]) + LogicalTableScan(table=[[catalog, departments]]) + })], variablesSet=[[$cor0]]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testExistsSubquery() { + givenQuery( + """ + SELECT name FROM catalog.employees + WHERE EXISTS (SELECT 1 FROM catalog.departments WHERE dept_id = age) + """) + .assertPlan( + """ + LogicalProject(name=[$1]) + LogicalFilter(condition=[EXISTS({ + LogicalProject(1=[1]) + LogicalFilter(condition=[=($0, $cor0.age)]) + LogicalTableScan(table=[[catalog, departments]]) + })], variablesSet=[[$cor0]]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java index c2e518cff2b..3af1f5d6ad3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java @@ -7,19 +7,19 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PEOPLE; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import java.io.IOException; -import java.util.Locale; import org.json.JSONObject; import org.junit.Test; /** - * Integration tests verifying that SQL queries unsupported by the V2 engine (JOIN, UNION, MINUS, + * Integration tests verifying that SQL queries unsupported by the V2 engine (JOIN, IN/EXISTS * subqueries) correctly fall back to the legacy engine and return valid results. * - *

These tests replace coverage lost when legacy ITs (JoinIT, SubqueryIT, MultiQueryIT) were - * excluded due to JSON response format deprecation in 3.0. They assert only successful execution - * (HTTP 200 + non-empty results) rather than exact response structure. + *

UNION/MINUS are not tested because the legacy engine's formatter is broken for set operations + * (MultiQueryIT is @Ignored for the same reason). */ public class LegacyFallbackIT extends SQLIntegTestCase { @@ -33,43 +33,26 @@ protected void init() throws Exception { public void testInnerJoinFallback() throws IOException { JSONObject result = executeQuery( - String.format( - Locale.ROOT, - "SELECT a.firstname, d.dog_name FROM %s a JOIN %s d" - + " ON d.holdersName = a.firstname WHERE a.age > 25", - TEST_INDEX_PEOPLE, - TEST_INDEX_DOG)); - assertTrue("JOIN query should return results", result.getJSONArray("datarows").length() > 0); + "SELECT a.firstname, d.dog_name FROM %s a JOIN %s d ON d.holdersName = a.firstname WHERE a.age > 35" + .formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG)); + verifyDataRows(result, rows("Hattie", "snoopy")); } @Test public void testLeftJoinFallback() throws IOException { JSONObject result = executeQuery( - String.format( - Locale.ROOT, - "SELECT a.firstname, d.dog_name FROM %s a LEFT JOIN %s d" - + " ON d.holdersName = a.firstname", - TEST_INDEX_PEOPLE, - TEST_INDEX_DOG)); - assertTrue( - "LEFT JOIN query should return results", result.getJSONArray("datarows").length() > 0); + "SELECT a.firstname, d.dog_name FROM %s a LEFT JOIN %s d ON d.holdersName = a.firstname WHERE a.firstname = 'Daenerys'" + .formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG)); + verifyDataRows(result, rows("Daenerys", "rex")); } - // Note: UNION and MINUS are not tested here because the legacy engine's UNION/MINUS support - // has known issues (MultiQueryIT is @Ignored). The V2 fallback works correctly (verified in - // cluster logs), but the legacy engine itself cannot execute these queries reliably. - @Test public void testInSubqueryFallback() throws IOException { JSONObject result = executeQuery( - String.format( - Locale.ROOT, - "SELECT firstname FROM %s WHERE age IN (SELECT age FROM %s WHERE age > 30)", - TEST_INDEX_PEOPLE, - TEST_INDEX_PEOPLE)); - assertTrue( - "IN subquery should return results", result.getJSONArray("datarows").length() > 0); + "SELECT a.firstname FROM %s a WHERE a.firstname IN (SELECT holdersName FROM %s)" + .formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG)); + verifyDataRows(result, rows("Daenerys"), rows("Hattie")); } } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 19c8aca53e5..6950acb4fe9 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -323,6 +323,7 @@ predicate | left = predicate NOT? LIKE right = predicate # likePredicate | left = predicate REGEXP right = predicate # regexpPredicate | predicate NOT? IN '(' expressions ')' # inPredicate + | predicate NOT? IN '(' querySpecification ')' # inSubqueryPredicate ; expressions @@ -334,6 +335,7 @@ expressionAtom | columnName # fullColumnNameExpressionAtom | functionCall # functionCallExpressionAtom | LR_BRACKET expression RR_BRACKET # nestedExpressionAtom + | EXISTS LR_BRACKET querySpecification RR_BRACKET # existsSubqueryExpressionAtom | left = expressionAtom mathOperator = (STAR | SLASH | MODULE) right = expressionAtom # mathExpressionAtom | left = expressionAtom mathOperator = (PLUS | MINUS) right = expressionAtom # mathExpressionAtom ; diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index 915a44c2d80..a265dced5c2 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -54,7 +54,7 @@ @RequiredArgsConstructor public class AstBuilder extends OpenSearchSQLParserBaseVisitor { - private final AstExpressionBuilder expressionBuilder = new AstExpressionBuilder(); + private final AstExpressionBuilder expressionBuilder = createExpressionBuilder(); /** Parsing context stack that contains context for current query parsing. */ private final ParsingContext context = new ParsingContext(); @@ -292,6 +292,11 @@ protected UnresolvedExpression visitAstExpression(ParseTree tree) { return expressionBuilder.visit(tree); } + /** Override to provide a custom expression builder (e.g., with subquery support). */ + protected AstExpressionBuilder createExpressionBuilder() { + return new AstExpressionBuilder(); + } + private UnresolvedExpression visitSelectItem(SelectElementContext ctx) { String name = StringUtils.unquoteIdentifier(getTextInQuery(ctx.expression(), query)); UnresolvedExpression expr = visitAstExpression(ctx.expression()); diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 346ef6660d7..22dc8c62b2e 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -82,6 +82,7 @@ import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.*; import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; @@ -668,4 +669,18 @@ private List getExtractFunctionArguments(ExtractFunctionCa visitFunctionArg(ctx.extractFunction().functionArg())); return args; } + + @Override + public UnresolvedExpression visitInSubqueryPredicate( + OpenSearchSQLParser.InSubqueryPredicateContext ctx) { + throw new SyntaxCheckException( + "IN subquery is not supported in this engine. Use the unified query path."); + } + + @Override + public UnresolvedExpression visitExistsSubqueryExpressionAtom( + OpenSearchSQLParser.ExistsSubqueryExpressionAtomContext ctx) { + throw new SyntaxCheckException( + "EXISTS subquery is not supported in this engine. Use the unified query path."); + } } From 1fc2dac9898d065267bc6bd6a57de19d773a9eaf Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 14 May 2026 20:01:54 -0700 Subject: [PATCH 3/3] feat(sql): add IN/EXISTS subquery, fix UNION/MINUS schema semantics - Add inSubqueryPredicate and existsSubqueryExpressionAtom grammar rules - Add ExtendedAstExpressionBuilder for subquery AST construction - Base AstExpressionBuilder throws SyntaxCheckException for fallback - Add unifySchema flag to Union: false for SQL (positional), true for PPL (schema merge via SchemaUnifier) - Remove SchemaUnifier from visitMinus (SQL-only, always positional) - Inline joinType into joinClause, use switch in toJoinType - Add AstDSL.inSubquery() and existsSubquery() - Add LegacyFallbackIT for JOIN and IN subquery fallback Signed-off-by: Chen Dai --- .../sql/api/parser/SqlV2QueryParser.java | 17 ++++----- .../sql/api/UnifiedQueryPlannerSqlV2Test.java | 38 +++++++++++++++++-- .../org/opensearch/sql/ast/dsl/AstDSL.java | 12 +++++- .../org/opensearch/sql/ast/tree/Union.java | 25 ++++++++---- .../sql/calcite/CalciteRelNodeVisitor.java | 11 +++--- .../opensearch/sql/sql/parser/AstBuilder.java | 9 +++-- 6 files changed, 81 insertions(+), 31 deletions(-) diff --git a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java index 3c66165f1e9..592f7e6058c 100644 --- a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java +++ b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java @@ -20,6 +20,7 @@ import org.opensearch.sql.ast.tree.Join.JoinType; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.sql.antlr.SQLSyntaxParser; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ExistsSubqueryExpressionAtomContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.InSubqueryPredicateContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinClauseContext; @@ -87,16 +88,12 @@ public UnresolvedPlan visitMinusSelect(MinusSelectContext ctx) { } private JoinType toJoinType(JoinClauseContext ctx) { - if (ctx.LEFT() != null) { - return JoinType.LEFT; - } - if (ctx.RIGHT() != null) { - return JoinType.RIGHT; - } - if (ctx.CROSS() != null) { - return JoinType.CROSS; - } - return JoinType.INNER; + return switch (ctx.getStart().getType()) { + case OpenSearchSQLParser.LEFT -> JoinType.LEFT; + case OpenSearchSQLParser.RIGHT -> JoinType.RIGHT; + case OpenSearchSQLParser.CROSS -> JoinType.CROSS; + default -> JoinType.INNER; + }; } /** Expression builder with IN/EXISTS subquery support. */ diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java index ce631651ab0..817fb570106 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java @@ -150,7 +150,14 @@ public void testUnionAll() { SELECT name, age FROM catalog.employees UNION ALL SELECT dept_name, dept_id FROM catalog.departments """) - .assertPlanContains("LogicalUnion(all=[true])"); + .assertPlan( + """ + LogicalUnion(all=[true]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + """); } @Test @@ -160,7 +167,14 @@ public void testUnion() { SELECT name, age FROM catalog.employees UNION SELECT dept_name, dept_id FROM catalog.departments """) - .assertPlanContains("LogicalUnion(all=[false])"); + .assertPlan( + """ + LogicalUnion(all=[false]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + """); } @Test @@ -171,7 +185,16 @@ public void testMultiWayUnion() { UNION ALL SELECT dept_name, dept_id FROM catalog.departments UNION ALL SELECT name, age FROM catalog.employees """) - .assertPlanContains("LogicalUnion(all=[true])"); + .assertPlan( + """ + LogicalUnion(all=[true]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + """); } @Test @@ -181,7 +204,14 @@ public void testMinus() { SELECT name, age FROM catalog.employees MINUS SELECT dept_name, dept_id FROM catalog.departments """) - .assertPlanContains("LogicalMinus(all=[false])"); + .assertPlan( + """ + LogicalMinus(all=[false]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + """); } @Test diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 644355a5d89..aed22414af4 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -48,6 +48,8 @@ import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.expression.Xor; +import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; +import org.opensearch.sql.ast.expression.subquery.InSubquery; import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; @@ -775,10 +777,18 @@ public static UnresolvedPlan join( } public static UnresolvedPlan union(List children, boolean distinct) { - return new Union(children, distinct, null); + return new Union(children, distinct); } public static UnresolvedPlan minus(List children) { return new Minus(children); } + + public static InSubquery inSubquery(List value, UnresolvedPlan query) { + return new InSubquery(value, query); + } + + public static ExistsSubquery existsSubquery(UnresolvedPlan query) { + return new ExistsSubquery(query); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java index a598de74a7e..caafca7beba 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java @@ -10,7 +10,6 @@ import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; -import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; @@ -18,23 +17,35 @@ @Getter @ToString @EqualsAndHashCode(callSuper = false) -@RequiredArgsConstructor @AllArgsConstructor public class Union extends UnresolvedPlan { + /** Input datasets to union. */ private final List datasets; - private boolean distinct; - private Integer maxout; - /** UNION ALL with maxout limit (PPL subsearch). */ + /** True for UNION (distinct), false for UNION ALL. */ + private final boolean distinct; + + /** True to merge different schemas (PPL append), false for positional union (SQL). */ + private final boolean unifySchema; + + /** Maximum rows from subsearch (PPL only, null if unlimited). */ + private final Integer maxout; + + /** SQL: UNION ALL or UNION DISTINCT (no schema unification). */ + public Union(List datasets, boolean distinct) { + this(datasets, distinct, false, null); + } + + /** PPL: UNION ALL with maxout limit and schema unification. */ public Union(List datasets, Integer maxout) { - this(datasets, false, maxout); + this(datasets, false, true, maxout); } @Override public UnresolvedPlan attach(UnresolvedPlan child) { List newDatasets = ImmutableList.builder().add(child).addAll(datasets).build(); - return new Union(newDatasets, distinct, maxout); + return new Union(newDatasets, distinct, unifySchema, maxout); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 927b0265747..d446a50440e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2946,7 +2946,9 @@ public RelNode visitUnion(Union node, CalcitePlanContext context) { } List unifiedInputs = - SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context); + node.isUnifySchema() + ? SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context) + : inputNodes; for (RelNode input : unifiedInputs) { context.relBuilder.push(input); @@ -2972,13 +2974,10 @@ public RelNode visitMinus(Minus node, CalcitePlanContext context) { inputNodes.add(context.relBuilder.build()); } - List unifiedInputs = - SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context); - - for (RelNode input : unifiedInputs) { + for (RelNode input : inputNodes) { context.relBuilder.push(input); } - context.relBuilder.minus(false, unifiedInputs.size()); + context.relBuilder.minus(false, inputNodes.size()); return context.relBuilder.peek(); } diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index a265dced5c2..f0416c5edfa 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -23,7 +23,6 @@ import java.util.Collections; import java.util.Locale; import java.util.Optional; -import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFields; @@ -51,10 +50,9 @@ import org.opensearch.sql.sql.parser.context.ParsingContext; /** Abstract syntax tree (AST) builder. */ -@RequiredArgsConstructor public class AstBuilder extends OpenSearchSQLParserBaseVisitor { - private final AstExpressionBuilder expressionBuilder = createExpressionBuilder(); + private final AstExpressionBuilder expressionBuilder; /** Parsing context stack that contains context for current query parsing. */ private final ParsingContext context = new ParsingContext(); @@ -65,6 +63,11 @@ public class AstBuilder extends OpenSearchSQLParserBaseVisitor { */ private final String query; + public AstBuilder(String query) { + this.query = query; + this.expressionBuilder = createExpressionBuilder(); + } + @Override public UnresolvedPlan visitShowStatement(OpenSearchSQLParser.ShowStatementContext ctx) { final UnresolvedExpression tableFilter = visitAstExpression(ctx.tableFilter());