From 5eb2dd4317a4d0deaf6e12489d581e1721f45a9c Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 14 May 2026 11:56:44 -0700 Subject: [PATCH] spath: parquet-backed test indices for analytics-engine route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `CalcitePPLSpathCommandIT.init()` was creating its four test indices by raw `PUT //_doc/N` requests, which auto-creates the index via the default Lucene path. The analytics-engine compatibility run (`-Dtests.analytics.parquet_indices=true`) injects the parquet/composite settings *inside* `TestUtils.createIndexByRestClient`, so the raw-PUT indices were Lucene-only and DataFusion failed with `UnsupportedOperationException: acquireReader is not supported in EngineBackedIndexer` for every test on the analytics-engine route. Fix: create the empty index up-front via `createIndexByRestClient(..., null)` so the toggle has a chance to inject parquet settings, then let the subsequent doc PUTs populate it via dynamic mapping. No mapping is declared — DataFusion is fine with dynamic mapping on a parquet-backed composite index. Same pattern as `CalciteEvalCommandIT` and `CalciteFieldFormatCommandIT`. No change for the v2 / Calcite path (the helper is a no-op when the parquet toggle isn't set). ## Pass rate Pairs with opensearch-project/OpenSearch#21664. Both PRs are required to move the analytics-engine route off 0 / 16. | IT | Route | Before | After | |---|---|---|---| | `CalcitePPLSpathCommandIT` | analytics-engine (`-Dtests.analytics.force_routing=true -Dtests.analytics.parquet_indices=true`) | 0 / 16 | **16 / 16** | | `CalcitePPLSpathCommandIT` | default v2 / Calcite (no flags) | 16 / 16 | 16 / 16 (no regression) | Signed-off-by: Kai Huang --- .../remote/CalcitePPLSpathCommandIT.java | 88 ++++++++++++------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index 0bd7ac803f9..365c87d7250 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -15,9 +15,17 @@ import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.client.Request; +import org.opensearch.sql.legacy.TestUtils; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalcitePPLSpathCommandIT extends PPLIntegTestCase { + // Pre-create each test index through TestUtils.createIndexByRestClient so the + // analytics-engine compatibility run (tests.analytics.parquet_indices=true) + // provisions them as parquet-backed composite stores. Raw `PUT //_doc/N` + // bypasses the helper, yielding a Lucene-only index that DataFusion cannot + // acquireReader on (`UnsupportedOperationException: acquireReader is not + // supported in EngineBackedIndexer`). Mapping passed as null — dynamic + // mapping infers the doc fields from the subsequent PUTs. @Override public void init() throws Exception { super.init(); @@ -26,48 +34,64 @@ public void init() throws Exception { loadIndex(Index.BANK); // Simple JSON docs for path-based extraction - Request request1 = new Request("PUT", "/test_spath/_doc/1?refresh=true"); - request1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}"); - client().performRequest(request1); + if (!TestUtils.isIndexExist(client(), "test_spath")) { + TestUtils.createIndexByRestClient(client(), "test_spath", null); - Request request2 = new Request("PUT", "/test_spath/_doc/2?refresh=true"); - request2.setJsonEntity("{\"doc\": \"{\\\"n\\\": 2}\"}"); - client().performRequest(request2); + Request request1 = new Request("PUT", "/test_spath/_doc/1?refresh=true"); + request1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}"); + client().performRequest(request1); - Request request3 = new Request("PUT", "/test_spath/_doc/3?refresh=true"); - request3.setJsonEntity("{\"doc\": \"{\\\"n\\\": 3}\"}"); - client().performRequest(request3); + Request request2 = new Request("PUT", "/test_spath/_doc/2?refresh=true"); + request2.setJsonEntity("{\"doc\": \"{\\\"n\\\": 2}\"}"); + client().performRequest(request2); + + Request request3 = new Request("PUT", "/test_spath/_doc/3?refresh=true"); + request3.setJsonEntity("{\"doc\": \"{\\\"n\\\": 3}\"}"); + client().performRequest(request3); + } // Auto-extract mode: flatten rules and edge cases (empty, malformed) - Request autoExtractDoc = new Request("PUT", "/test_spath_auto/_doc/1?refresh=true"); - autoExtractDoc.setJsonEntity( - "{\"nested_doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\"}}\"," - + " \"array_doc\": \"{\\\"tags\\\":[\\\"java\\\",\\\"sql\\\"]}\"," - + " \"merge_doc\": \"{\\\"a\\\":{\\\"b\\\":1},\\\"a.b\\\":2}\"," - + " \"stringify_doc\": \"{\\\"n\\\":30,\\\"b\\\":true,\\\"x\\\":null}\"," - + " \"empty_doc\": \"{}\"," - + " \"malformed_doc\": \"{\\\"user\\\":{\\\"name\\\":\"}"); - client().performRequest(autoExtractDoc); + if (!TestUtils.isIndexExist(client(), "test_spath_auto")) { + TestUtils.createIndexByRestClient(client(), "test_spath_auto", null); + + Request autoExtractDoc = new Request("PUT", "/test_spath_auto/_doc/1?refresh=true"); + autoExtractDoc.setJsonEntity( + "{\"nested_doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\"}}\"," + + " \"array_doc\": \"{\\\"tags\\\":[\\\"java\\\",\\\"sql\\\"]}\"," + + " \"merge_doc\": \"{\\\"a\\\":{\\\"b\\\":1},\\\"a.b\\\":2}\"," + + " \"stringify_doc\": \"{\\\"n\\\":30,\\\"b\\\":true,\\\"x\\\":null}\"," + + " \"empty_doc\": \"{}\"," + + " \"malformed_doc\": \"{\\\"user\\\":{\\\"name\\\":\"}"); + client().performRequest(autoExtractDoc); + } // Auto-extract mode: 2-doc index for spath + command (eval/where/stats/sort) tests - Request cmdDoc1 = new Request("PUT", "/test_spath_cmd/_doc/1?refresh=true"); - cmdDoc1.setJsonEntity( - "{\"doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\",\\\"age\\\":30}}\"}"); - client().performRequest(cmdDoc1); + if (!TestUtils.isIndexExist(client(), "test_spath_cmd")) { + TestUtils.createIndexByRestClient(client(), "test_spath_cmd", null); + + Request cmdDoc1 = new Request("PUT", "/test_spath_cmd/_doc/1?refresh=true"); + cmdDoc1.setJsonEntity( + "{\"doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\",\\\"age\\\":30}}\"}"); + client().performRequest(cmdDoc1); - Request cmdDoc2 = new Request("PUT", "/test_spath_cmd/_doc/2?refresh=true"); - cmdDoc2.setJsonEntity( - "{\"doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"Alice\\\",\\\"age\\\":25}}\"}"); - client().performRequest(cmdDoc2); + Request cmdDoc2 = new Request("PUT", "/test_spath_cmd/_doc/2?refresh=true"); + cmdDoc2.setJsonEntity( + "{\"doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"Alice\\\",\\\"age\\\":25}}\"}"); + client().performRequest(cmdDoc2); + } // Auto-extract mode: null input handling (doc 1 establishes mapping, doc 2 has null) - Request nullDoc1 = new Request("PUT", "/test_spath_null/_doc/1?refresh=true"); - nullDoc1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}"); - client().performRequest(nullDoc1); + if (!TestUtils.isIndexExist(client(), "test_spath_null")) { + TestUtils.createIndexByRestClient(client(), "test_spath_null", null); + + Request nullDoc1 = new Request("PUT", "/test_spath_null/_doc/1?refresh=true"); + nullDoc1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}"); + client().performRequest(nullDoc1); - Request nullDoc2 = new Request("PUT", "/test_spath_null/_doc/2?refresh=true"); - nullDoc2.setJsonEntity("{\"doc\": null}"); - client().performRequest(nullDoc2); + Request nullDoc2 = new Request("PUT", "/test_spath_null/_doc/2?refresh=true"); + nullDoc2.setJsonEntity("{\"doc\": null}"); + client().performRequest(nullDoc2); + } } @Test