diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 7a885edd65a..38c6561b409 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -122,6 +122,11 @@ public static class Builder { * Setting values with defaults from SysLimit.DEFAULT. Only includes planning-required settings * to avoid coupling with OpenSearchSettings. * + *

{@link Settings.Key#PPL_JOIN_SUBSEARCH_MAXOUT} defaults to {@code 0} to avoid injecting + * {@code LogicalSystemLimit} into the logical plan, which is an OpenSearch-specific operational + * concern irrelevant to external consumers of the unified query API. {@link + * Settings.Key#PPL_SUBSEARCH_MAXOUT} is set to {@code 0} for the same reason. + * *

{@link Settings.Key#CALCITE_ENGINE_ENABLED} defaults to {@code true} here because the * unified query path is by definition Calcite-based — every query reaching this context flows * through Calcite's planner, never the v2 engine. The PPL {@link @@ -142,8 +147,8 @@ public static class Builder { new HashMap( Map.of( QUERY_SIZE_LIMIT, SysLimit.DEFAULT.querySizeLimit(), - PPL_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.subsearchLimit(), - PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.joinSubsearchLimit(), + PPL_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.subsearchLimit(), + PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.joinSubsearchLimit(), CALCITE_ENGINE_ENABLED, true, PPL_REX_MAX_MATCH_LIMIT, 10)); diff --git a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java index 827278a2119..592f7e6058c 100644 --- a/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java +++ b/api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java @@ -5,12 +5,29 @@ package org.opensearch.sql.api.parser; +import static org.opensearch.sql.ast.dsl.AstDSL.join; +import static org.opensearch.sql.ast.dsl.AstDSL.minus; +import static org.opensearch.sql.ast.dsl.AstDSL.union; + +import java.util.List; +import java.util.Optional; import org.antlr.v4.runtime.tree.ParseTree; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; +import org.opensearch.sql.ast.expression.subquery.InSubquery; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.Join.JoinType; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.sql.antlr.SQLSyntaxParser; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ExistsSubqueryExpressionAtomContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.InSubqueryPredicateContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinClauseContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MinusSelectContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.UnionSelectContext; import org.opensearch.sql.sql.parser.AstBuilder; +import org.opensearch.sql.sql.parser.AstExpressionBuilder; import org.opensearch.sql.sql.parser.AstStatementBuilder; /** SQL query parser that produces {@link UnresolvedPlan} using the V2 ANTLR grammar. */ @@ -24,7 +41,8 @@ public UnresolvedPlan parse(String query) { ParseTree cst = syntaxParser.parse(query); AstStatementBuilder astStmtBuilder = new AstStatementBuilder( - new AstBuilder(query), AstStatementBuilder.StatementBuilderContext.builder().build()); + new ExtendedAstBuilder(query), + AstStatementBuilder.StatementBuilderContext.builder().build()); Statement statement = cst.accept(astStmtBuilder); if (statement instanceof Query) { @@ -33,4 +51,66 @@ public UnresolvedPlan parse(String query) { throw new UnsupportedOperationException( "Only query statements are supported but got " + statement.getClass().getSimpleName()); } + + /** + * Extends the V2 AstBuilder with legacy-only SQL features (JOIN, UNION, MINUS) that the base + * AstBuilder rejects with SyntaxCheckException to trigger legacy engine fallback. + */ + private static class ExtendedAstBuilder extends AstBuilder { + + ExtendedAstBuilder(String query) { + super(query); + } + + @Override + protected AstExpressionBuilder createExpressionBuilder() { + return new ExtendedAstExpressionBuilder(); + } + + @Override + public UnresolvedPlan visitJoinClause(JoinClauseContext ctx) { + JoinType joinType = toJoinType(ctx); + UnresolvedPlan right = visit(ctx.relation()); + Optional condition = + Optional.ofNullable(ctx.expression()).map(this::visitAstExpression); + return join(right, joinType, condition); + } + + @Override + public UnresolvedPlan visitUnionSelect(UnionSelectContext ctx) { + boolean distinct = ctx.ALL().isEmpty(); + return union(ctx.querySpecification().stream().map(this::visit).toList(), distinct); + } + + @Override + public UnresolvedPlan visitMinusSelect(MinusSelectContext ctx) { + return minus(ctx.querySpecification().stream().map(this::visit).toList()); + } + + private JoinType toJoinType(JoinClauseContext ctx) { + return switch (ctx.getStart().getType()) { + case OpenSearchSQLParser.LEFT -> JoinType.LEFT; + case OpenSearchSQLParser.RIGHT -> JoinType.RIGHT; + case OpenSearchSQLParser.CROSS -> JoinType.CROSS; + default -> JoinType.INNER; + }; + } + + /** Expression builder with IN/EXISTS subquery support. */ + private class ExtendedAstExpressionBuilder extends AstExpressionBuilder { + + @Override + public UnresolvedExpression visitInSubqueryPredicate(InSubqueryPredicateContext ctx) { + UnresolvedPlan subquery = ExtendedAstBuilder.this.visit(ctx.querySpecification()); + return new InSubquery(List.of(visit(ctx.predicate())), subquery); + } + + @Override + public UnresolvedExpression visitExistsSubqueryExpressionAtom( + ExistsSubqueryExpressionAtomContext ctx) { + UnresolvedPlan subquery = ExtendedAstBuilder.this.visit(ctx.querySpecification()); + return new ExistsSubquery(subquery); + } + } + } } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java index f0111d06363..3960e8a6206 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java @@ -30,8 +30,8 @@ public void testContextCreationWithDefaults() { assertNotNull("PlanContext should be created", context.getPlanContext()); assertNotNull("Settings should be created", context.getSettings()); assertEquals( - "Settings should have default system limits", - SysLimit.DEFAULT, + "Settings should have unlimited subsearch limits for clean logical plans", + new SysLimit(SysLimit.DEFAULT.querySizeLimit(), 0, 0), SysLimit.fromSettings(context.getSettings())); assertEquals( "PPL_REX_MAX_MATCH_LIMIT default should be 10", diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java index b9913bf450b..817fb570106 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java @@ -5,12 +5,19 @@ package org.opensearch.sql.api; +import static org.apache.calcite.sql.type.SqlTypeName.INTEGER; +import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR; + +import java.util.Map; +import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.junit.Before; import org.junit.Test; import org.opensearch.sql.executor.QueryType; /** - * Tests for basic SQL query planning through the V2 ANTLR parser path. Covers SELECT, WHERE, ORDER - * BY operations that produce valid RelNode plans. + * Tests for SQL query planning through the V2 ANTLR parser path. Covers SELECT, WHERE, ORDER BY, + * JOIN, UNION, and MINUS operations that produce valid RelNode plans. */ public class UnifiedQueryPlannerSqlV2Test extends UnifiedQueryTestBase { @@ -19,17 +26,35 @@ protected QueryType queryType() { return QueryType.SQL; } - @Test - public void selectStar() { - givenQuery("SELECT * FROM catalog.employees") - .assertPlan( - """ - LogicalTableScan(table=[[catalog, employees]]) - """); + @Before + @Override + public void setUp() { + testSchema = + new AbstractSchema() { + @Override + protected Map getTableMap() { + return Map.of( + "employees", createEmployeesTable(), + "departments", createDepartmentsTable()); + } + }; + + context = contextBuilder().build(); + planner = new UnifiedQueryPlanner(context); + } + + private Table createDepartmentsTable() { + return SimpleTable.builder() + .col("dept_id", INTEGER) + .col("dept_name", VARCHAR) + .row(new Object[] {1, "Engineering"}) + .row(new Object[] {2, "Sales"}) + .row(new Object[] {3, "Marketing"}) + .build(); } @Test - public void selectStarFields() { + public void selectStar() { givenQuery("SELECT * FROM catalog.employees") .assertPlan( """ @@ -68,4 +93,162 @@ public void testFilterAndOrderBy() { LogicalTableScan(table=[[catalog, employees]]) """); } + + @Test + public void testJoinTypes() { + Map.of("JOIN", "inner", "LEFT JOIN", "left", "RIGHT JOIN", "right") + .forEach( + (syntax, type) -> + givenQuery( + """ + SELECT * FROM catalog.employees %s catalog.departments + ON employees.department = departments.dept_name + """ + .formatted(syntax)) + .assertPlan( + """ + LogicalJoin(condition=[=($3, $5)], joinType=[%s]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalTableScan(table=[[catalog, departments]]) + """ + .formatted(type))); + } + + @Test + public void testCrossJoin() { + givenQuery("SELECT * FROM catalog.employees CROSS JOIN catalog.departments") + .assertPlan( + """ + LogicalJoin(condition=[true], joinType=[inner]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalTableScan(table=[[catalog, departments]]) + """); + } + + @Test + public void testJoinWithFilterAndOrderBy() { + givenQuery( + """ + SELECT * FROM catalog.employees JOIN catalog.departments + ON employees.department = departments.dept_name + WHERE employees.age > 30 ORDER BY employees.name + """) + .assertPlan( + """ + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalFilter(condition=[>($2, 30)]) + LogicalJoin(condition=[=($3, $5)], joinType=[inner]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalTableScan(table=[[catalog, departments]]) + """); + } + + @Test + public void testUnionAll() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + UNION ALL SELECT dept_name, dept_id FROM catalog.departments + """) + .assertPlan( + """ + LogicalUnion(all=[true]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + """); + } + + @Test + public void testUnion() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + UNION SELECT dept_name, dept_id FROM catalog.departments + """) + .assertPlan( + """ + LogicalUnion(all=[false]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + """); + } + + @Test + public void testMultiWayUnion() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + UNION ALL SELECT dept_name, dept_id FROM catalog.departments + UNION ALL SELECT name, age FROM catalog.employees + """) + .assertPlan( + """ + LogicalUnion(all=[true]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMinus() { + givenQuery( + """ + SELECT name, age FROM catalog.employees + MINUS SELECT dept_name, dept_id FROM catalog.departments + """) + .assertPlan( + """ + LogicalMinus(all=[false]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalProject(dept_name=[$1], dept_id=[$0]) + LogicalTableScan(table=[[catalog, departments]]) + """); + } + + @Test + public void testInSubquery() { + givenQuery( + """ + SELECT name FROM catalog.employees + WHERE age IN (SELECT age FROM catalog.departments WHERE dept_name = 'Engineering') + """) + .assertPlan( + """ + LogicalProject(name=[$1]) + LogicalFilter(condition=[IN($2, { + LogicalProject(age=[$cor0.age]) + LogicalFilter(condition=[=($1, 'Engineering')]) + LogicalTableScan(table=[[catalog, departments]]) + })], variablesSet=[[$cor0]]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testExistsSubquery() { + givenQuery( + """ + SELECT name FROM catalog.employees + WHERE EXISTS (SELECT 1 FROM catalog.departments WHERE dept_id = age) + """) + .assertPlan( + """ + LogicalProject(name=[$1]) + LogicalFilter(condition=[EXISTS({ + LogicalProject(1=[1]) + LogicalFilter(condition=[=($0, $cor0.age)]) + LogicalTableScan(table=[[catalog, departments]]) + })], variablesSet=[[$cor0]]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index be02547a2da..b8410d2f349 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -69,6 +69,7 @@ import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; +import org.opensearch.sql.ast.tree.Minus; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.MvExpand; @@ -477,6 +478,10 @@ public T visitUnion(Union node, C context) { return visitChildren(node, context); } + public T visitMinus(Minus node, C context) { + return visitChildren(node, context); + } + public T visitAddTotals(AddTotals node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index b2731ebbd40..aed22414af4 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -48,6 +48,8 @@ import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.expression.Xor; +import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; +import org.opensearch.sql.ast.expression.subquery.InSubquery; import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; @@ -60,8 +62,10 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; +import org.opensearch.sql.ast.tree.Minus; import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; @@ -81,6 +85,7 @@ import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.calcite.plan.OpenSearchConstants; @@ -757,4 +762,33 @@ public static Bin bin(UnresolvedExpression field, Argument... arguments) { public static Field implicitTimestampField() { return AstDSL.field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP); } + + public static UnresolvedPlan join( + UnresolvedPlan right, Join.JoinType joinType, Optional condition) { + return new Join( + right, + Optional.empty(), + Optional.empty(), + joinType, + condition, + new Join.JoinHint(), + Optional.empty(), + Argument.ArgumentMap.empty()); + } + + public static UnresolvedPlan union(List children, boolean distinct) { + return new Union(children, distinct); + } + + public static UnresolvedPlan minus(List children) { + return new Minus(children); + } + + public static InSubquery inSubquery(List value, UnresolvedPlan query) { + return new InSubquery(value, query); + } + + public static ExistsSubquery existsSubquery(UnresolvedPlan query) { + return new ExistsSubquery(query); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Minus.java b/core/src/main/java/org/opensearch/sql/ast/tree/Minus.java new file mode 100644 index 00000000000..d67692ec478 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Minus.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +/** Logical plan node for Minus (EXCEPT) operation. Returns rows in left that are not in right. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class Minus extends UnresolvedPlan { + private final List children; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + List newChildren = + ImmutableList.builder().add(child).addAll(children).build(); + return new Minus(newChildren); + } + + @Override + public List getChild() { + return children; + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitMinus(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java index a96831567cb..caafca7beba 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java @@ -10,26 +10,42 @@ import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; -import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; -/** Logical plan node for Union operation. Combines results from multiple datasets (UNION ALL). */ +/** Logical plan node for Union operation. Combines results from multiple datasets. */ @Getter @ToString @EqualsAndHashCode(callSuper = false) -@RequiredArgsConstructor @AllArgsConstructor public class Union extends UnresolvedPlan { + /** Input datasets to union. */ private final List datasets; - private Integer maxout; + /** True for UNION (distinct), false for UNION ALL. */ + private final boolean distinct; + + /** True to merge different schemas (PPL append), false for positional union (SQL). */ + private final boolean unifySchema; + + /** Maximum rows from subsearch (PPL only, null if unlimited). */ + private final Integer maxout; + + /** SQL: UNION ALL or UNION DISTINCT (no schema unification). */ + public Union(List datasets, boolean distinct) { + this(datasets, distinct, false, null); + } + + /** PPL: UNION ALL with maxout limit and schema unification. */ + public Union(List datasets, Integer maxout) { + this(datasets, false, true, maxout); + } @Override public UnresolvedPlan attach(UnresolvedPlan child) { List newDatasets = ImmutableList.builder().add(child).addAll(datasets).build(); - return new Union(newDatasets, maxout); + return new Union(newDatasets, distinct, unifySchema, maxout); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 1251f51b131..d446a50440e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -135,6 +135,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; import org.opensearch.sql.ast.tree.ML; +import org.opensearch.sql.ast.tree.Minus; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.MvExpand; @@ -535,6 +536,9 @@ private List expandProjectFields( expandedFields.add(resolved); } } + case Alias alias -> { + expandedFields.add(rexVisitor.analyze(alias, context)); + } case AllFields ignored -> { currentFields.stream() .filter(field -> !isMetadataField(field)) @@ -2942,12 +2946,14 @@ public RelNode visitUnion(Union node, CalcitePlanContext context) { } List unifiedInputs = - SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context); + node.isUnifySchema() + ? SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context) + : inputNodes; for (RelNode input : unifiedInputs) { context.relBuilder.push(input); } - context.relBuilder.union(true, unifiedInputs.size()); // true = UNION ALL + context.relBuilder.union(!node.isDistinct(), unifiedInputs.size()); if (node.getMaxout() != null) { context.relBuilder.push( @@ -2960,6 +2966,22 @@ public RelNode visitUnion(Union node, CalcitePlanContext context) { return context.relBuilder.peek(); } + @Override + public RelNode visitMinus(Minus node, CalcitePlanContext context) { + List inputNodes = new ArrayList<>(); + for (UnresolvedPlan child : node.getChildren()) { + child.accept(this, context); + inputNodes.add(context.relBuilder.build()); + } + + for (RelNode input : inputNodes) { + context.relBuilder.push(input); + } + context.relBuilder.minus(false, inputNodes.size()); + + return context.relBuilder.peek(); + } + /* * Unsupported Commands of PPL with Calcite for OpenSearch 3.0.0-beta */ diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java new file mode 100644 index 00000000000..3af1f5d6ad3 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/LegacyFallbackIT.java @@ -0,0 +1,58 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.legacy; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PEOPLE; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; + +/** + * Integration tests verifying that SQL queries unsupported by the V2 engine (JOIN, IN/EXISTS + * subqueries) correctly fall back to the legacy engine and return valid results. + * + *

UNION/MINUS are not tested because the legacy engine's formatter is broken for set operations + * (MultiQueryIT is @Ignored for the same reason). + */ +public class LegacyFallbackIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.DOG); + loadIndex(Index.PEOPLE); + } + + @Test + public void testInnerJoinFallback() throws IOException { + JSONObject result = + executeQuery( + "SELECT a.firstname, d.dog_name FROM %s a JOIN %s d ON d.holdersName = a.firstname WHERE a.age > 35" + .formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG)); + verifyDataRows(result, rows("Hattie", "snoopy")); + } + + @Test + public void testLeftJoinFallback() throws IOException { + JSONObject result = + executeQuery( + "SELECT a.firstname, d.dog_name FROM %s a LEFT JOIN %s d ON d.holdersName = a.firstname WHERE a.firstname = 'Daenerys'" + .formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG)); + verifyDataRows(result, rows("Daenerys", "rex")); + } + + @Test + public void testInSubqueryFallback() throws IOException { + JSONObject result = + executeQuery( + "SELECT a.firstname FROM %s a WHERE a.firstname IN (SELECT holdersName FROM %s)" + .formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG)); + verifyDataRows(result, rows("Daenerys"), rows("Hattie")); + } +} diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 6b34507eacc..6950acb4fe9 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -32,6 +32,20 @@ parser grammar OpenSearchSQLParser; options { tokenVocab = OpenSearchSQLLexer; } + +@members { + /** + * Returns true if the next token is a join keyword that should not be consumed as a table alias. + * LEFT/RIGHT are valid identifiers (text function names), so without this predicate ANTLR4 + * greedily consumes them as implicit table aliases (e.g., FROM t1 LEFT becomes alias='LEFT' + * instead of starting a LEFT JOIN clause). + */ + private boolean isJoinKeyword() { + int t = _input.LT(1).getType(); + return t == LEFT || t == RIGHT || t == INNER || t == CROSS || t == JOIN; + } +} + // Top Level Description // Root rule @@ -54,7 +68,9 @@ dmlStatement // Primary DML Statements selectStatement - : querySpecification # simpleSelect + : querySpecification # simpleSelect + | querySpecification (UNION ALL? querySpecification)+ # unionSelect + | querySpecification EXCEPT querySpecification # minusSelect ; adminStatement @@ -104,12 +120,17 @@ selectElement ; fromClause - : FROM relation (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY + : FROM relation joinClause* (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY ; +joinClause + : (INNER | CROSS)? JOIN relation (ON expression)? + | (LEFT | RIGHT) OUTER? JOIN relation (ON expression)? + ; + relation - : tableName (AS? alias)? # tableAsRelation + : tableName (AS alias | {!isJoinKeyword()}? alias)? # tableAsRelation | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation | qualifiedName LR_BRACKET tableFunctionArgs RR_BRACKET (AS? alias)? # tableFunctionRelation ; @@ -302,6 +323,7 @@ predicate | left = predicate NOT? LIKE right = predicate # likePredicate | left = predicate REGEXP right = predicate # regexpPredicate | predicate NOT? IN '(' expressions ')' # inPredicate + | predicate NOT? IN '(' querySpecification ')' # inSubqueryPredicate ; expressions @@ -313,6 +335,7 @@ expressionAtom | columnName # fullColumnNameExpressionAtom | functionCall # functionCallExpressionAtom | LR_BRACKET expression RR_BRACKET # nestedExpressionAtom + | EXISTS LR_BRACKET querySpecification RR_BRACKET # existsSubqueryExpressionAtom | left = expressionAtom mathOperator = (STAR | SLASH | MODULE) right = expressionAtom # mathExpressionAtom | left = expressionAtom mathOperator = (PLUS | MINUS) right = expressionAtom # mathExpressionAtom ; diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index 5250ab7fb0f..f0416c5edfa 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -23,7 +23,6 @@ import java.util.Collections; import java.util.Locale; import java.util.Optional; -import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFields; @@ -32,6 +31,7 @@ import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.DescribeRelation; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.Relation; @@ -50,10 +50,9 @@ import org.opensearch.sql.sql.parser.context.ParsingContext; /** Abstract syntax tree (AST) builder. */ -@RequiredArgsConstructor public class AstBuilder extends OpenSearchSQLParserBaseVisitor { - private final AstExpressionBuilder expressionBuilder = new AstExpressionBuilder(); + private final AstExpressionBuilder expressionBuilder; /** Parsing context stack that contains context for current query parsing. */ private final ParsingContext context = new ParsingContext(); @@ -64,6 +63,11 @@ public class AstBuilder extends OpenSearchSQLParserBaseVisitor { */ private final String query; + public AstBuilder(String query) { + this.query = query; + this.expressionBuilder = createExpressionBuilder(); + } + @Override public UnresolvedPlan visitShowStatement(OpenSearchSQLParser.ShowStatementContext ctx) { final UnresolvedExpression tableFilter = visitAstExpression(ctx.tableFilter()); @@ -139,6 +143,13 @@ public UnresolvedPlan visitLimitClause(OpenSearchSQLParser.LimitClauseContext ct public UnresolvedPlan visitFromClause(FromClauseContext ctx) { UnresolvedPlan result = visit(ctx.relation()); + // Dispatch each joinClause to visitJoinClause (throws in base, overridden in subclass) + for (var joinCtx : ctx.joinClause()) { + UnresolvedPlan joinPlan = visit(joinCtx); + ((Join) joinPlan).attach(result); + result = joinPlan; + } + if (ctx.whereClause() != null) { result = visit(ctx.whereClause()).attach(result); } @@ -163,6 +174,24 @@ public UnresolvedPlan visitFromClause(FromClauseContext ctx) { return result; } + @Override + public UnresolvedPlan visitJoinClause(OpenSearchSQLParser.JoinClauseContext ctx) { + throw new SyntaxCheckException( + "JOIN is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitUnionSelect(OpenSearchSQLParser.UnionSelectContext ctx) { + throw new SyntaxCheckException( + "UNION is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitMinusSelect(OpenSearchSQLParser.MinusSelectContext ctx) { + throw new SyntaxCheckException( + "EXCEPT is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + /** * Ensure NESTED function is not used in HAVING clause and fallback to legacy engine. Can remove * when support is added for NESTED function in HAVING clause. @@ -261,10 +290,16 @@ protected UnresolvedPlan aggregateResult(UnresolvedPlan aggregate, UnresolvedPla return nextResult != null ? nextResult : aggregate; } - private UnresolvedExpression visitAstExpression(ParseTree tree) { + /** Visible to subclasses that need to parse expressions (e.g., JOIN ON condition). */ + protected UnresolvedExpression visitAstExpression(ParseTree tree) { return expressionBuilder.visit(tree); } + /** Override to provide a custom expression builder (e.g., with subquery support). */ + protected AstExpressionBuilder createExpressionBuilder() { + return new AstExpressionBuilder(); + } + private UnresolvedExpression visitSelectItem(SelectElementContext ctx) { String name = StringUtils.unquoteIdentifier(getTextInQuery(ctx.expression(), query)); UnresolvedExpression expr = visitAstExpression(ctx.expression()); diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 346ef6660d7..22dc8c62b2e 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -82,6 +82,7 @@ import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.*; import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; @@ -668,4 +669,18 @@ private List getExtractFunctionArguments(ExtractFunctionCa visitFunctionArg(ctx.extractFunction().functionArg())); return args; } + + @Override + public UnresolvedExpression visitInSubqueryPredicate( + OpenSearchSQLParser.InSubqueryPredicateContext ctx) { + throw new SyntaxCheckException( + "IN subquery is not supported in this engine. Use the unified query path."); + } + + @Override + public UnresolvedExpression visitExistsSubqueryExpressionAtom( + OpenSearchSQLParser.ExistsSubqueryExpressionAtomContext ctx) { + throw new SyntaxCheckException( + "EXISTS subquery is not supported in this engine. Use the unified query path."); + } } diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserV2ExtensionTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserV2ExtensionTest.java new file mode 100644 index 00000000000..19f4a6a4cc2 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserV2ExtensionTest.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.antlr; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import org.junit.jupiter.api.Test; + +/** + * Tests for V2 extended SQL grammar features: JOIN, UNION, and MINUS (EXCEPT). These features are + * parsed by the V2 ANTLR grammar but throw SyntaxCheckException in the base AstBuilder to trigger + * legacy engine fallback. The extended AstBuilder in the unified query path handles them. + */ +class SQLSyntaxParserV2ExtensionTest { + + private final SQLSyntaxParser parser = new SQLSyntaxParser(); + + @Test + void canParseInnerJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 JOIN t2 ON t1.id = t2.id")); + } + + @Test + void canParseLeftJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id")); + } + + @Test + void canParseRightJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 RIGHT JOIN t2 ON t1.id = t2.id")); + } + + @Test + void canParseCrossJoin() { + assertNotNull(parser.parse("SELECT * FROM t1 CROSS JOIN t2")); + } + + @Test + void canParseUnionAll() { + assertNotNull(parser.parse("SELECT a FROM t1 UNION ALL SELECT b FROM t2")); + } + + @Test + void canParseUnion() { + assertNotNull(parser.parse("SELECT a FROM t1 UNION SELECT b FROM t2")); + } + + @Test + void canParseMinus() { + assertNotNull(parser.parse("SELECT a FROM t1 MINUS SELECT b FROM t2")); + } + + @Test + void canParseMultiWayUnion() { + assertNotNull( + parser.parse("SELECT a FROM t1 UNION ALL SELECT b FROM t2 UNION ALL SELECT c FROM t3")); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java index 695cf85b144..69f4035905c 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java @@ -729,4 +729,22 @@ public void can_build_string_literal_highlight() { alias("highlight(\"fieldA\")", highlight(AstDSL.stringLiteral("fieldA"), args))), buildAST("SELECT highlight(\"fieldA\") FROM test")); } + + @Test + public void join_throws_syntax_check_exception_for_legacy_fallback() { + assertThrows( + SyntaxCheckException.class, () -> buildAST("SELECT * FROM t1 JOIN t2 ON t1.id = t2.id")); + } + + @Test + public void union_throws_syntax_check_exception_for_legacy_fallback() { + assertThrows( + SyntaxCheckException.class, () -> buildAST("SELECT a FROM t1 UNION ALL SELECT b FROM t2")); + } + + @Test + public void except_throws_syntax_check_exception_for_legacy_fallback() { + assertThrows( + SyntaxCheckException.class, () -> buildAST("SELECT a FROM t1 MINUS SELECT b FROM t2")); + } }