From db2d39c4cd6fa70914058376eb525f5467cd78e7 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Wed, 11 Feb 2026 22:26:42 -0600 Subject: [PATCH 1/9] Add Docker integration tests. --- .bin/pre-release-docker | 5 + .github/workflows/pre-release.yaml | 25 ++- ice-rest-catalog/pom.xml | 1 - .../rest/catalog/DockerScenarioBasedIT.java | 183 ++++++++++++++++++ .../ice/rest/catalog/RESTCatalogTestBase.java | 97 ++++++++++ .../ice/rest/catalog/ScenarioBasedIT.java | 111 +---------- .../ice/rest/catalog/ScenarioConfig.java | 13 +- .../ice/rest/catalog/ScenarioTestRunner.java | 4 +- .../src/test/resources/scenarios/README.md | 20 -- .../scenarios/basic-operations/input.parquet | Bin 0 -> 2446 bytes .../scenarios/basic-operations/run.sh.tmpl | 8 - .../scenarios/basic-operations/scenario.yaml | 19 -- .../insert-partitioned/scenario.yaml | 19 -- .../scenarios/insert-scan/scenario.yaml | 17 -- 14 files changed, 315 insertions(+), 207 deletions(-) create mode 100644 ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/DockerScenarioBasedIT.java create mode 100644 ice-rest-catalog/src/test/resources/scenarios/basic-operations/input.parquet diff --git a/.bin/pre-release-docker b/.bin/pre-release-docker index 2671de32..990a5597 100755 --- a/.bin/pre-release-docker +++ b/.bin/pre-release-docker @@ -8,6 +8,11 @@ export SKIP_VERIFY=1 export PATH="$(pwd)/.bin:$PATH" +echo >&2 'Building ice Docker image' +docker-build-ice +echo >&2 'Building ice-rest-catalog Docker image' +docker-build-ice-rest-catalog + echo >&2 'Pushing ice Docker image' docker-build-ice --push echo >&2 'Pushing ice-rest-catalog Docker image' diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index b1ec2086..fb893650 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -49,4 +49,27 @@ jobs: - uses: actions/checkout@v4 - - run: .bin/pre-release-docker + - uses: actions/setup-java@v4 + with: + java-version: '21' + distribution: 'graalvm' + cache: maven + + - name: Build Docker images + run: | + export VERSION=0.0.0-latest-master+$(git rev-parse --short HEAD) + export IMAGE_TAG="latest-master" + export SKIP_VERIFY=1 + export PATH="$(pwd)/.bin:$PATH" + docker-build-ice + docker-build-ice-rest-catalog + + - name: Run Docker integration tests + run: > + ./mvnw -pl ice-rest-catalog install -Dmaven.test.skip=true -Pno-check && + ./mvnw -pl ice-rest-catalog failsafe:integration-test failsafe:verify + -Dit.test=DockerScenarioBasedIT + -Ddocker.image=altinity/ice-rest-catalog:debug-with-ice-latest-master-amd64 + + - name: Push Docker images + run: .bin/pre-release-docker diff --git a/ice-rest-catalog/pom.xml b/ice-rest-catalog/pom.xml index 43a8121e..fda0550b 100644 --- a/ice-rest-catalog/pom.xml +++ b/ice-rest-catalog/pom.xml @@ -393,7 +393,6 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml ${jackson.version} - test io.etcd diff --git a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/DockerScenarioBasedIT.java b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/DockerScenarioBasedIT.java new file mode 100644 index 00000000..2916589a --- /dev/null +++ b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/DockerScenarioBasedIT.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2025 Altinity Inc and/or its affiliates. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package com.altinity.ice.rest.catalog; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.utility.MountableFile; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; + +/** + * Docker-based integration tests for ICE REST Catalog. + * + *

Runs the ice-rest-catalog Docker image (specified via system property {@code docker.image}) + * alongside a MinIO container, then executes scenario-based tests against it. + */ +public class DockerScenarioBasedIT extends RESTCatalogTestBase { + + private Network network; + + private GenericContainer minio; + + private GenericContainer catalog; + + @Override + @BeforeClass + public void setUp() throws Exception { + String dockerImage = + System.getProperty("docker.image", "altinity/ice-rest-catalog:debug-with-ice-0.12.0"); + logger.info("Using Docker image: {}", dockerImage); + + network = Network.newNetwork(); + + // Start MinIO + minio = + new GenericContainer<>("minio/minio:latest") + .withNetwork(network) + .withNetworkAliases("minio") + .withExposedPorts(9000) + .withEnv("MINIO_ACCESS_KEY", "minioadmin") + .withEnv("MINIO_SECRET_KEY", "minioadmin") + .withCommand("server", "/data") + .waitingFor(Wait.forHttp("/minio/health/live").forPort(9000)); + minio.start(); + + // Create test bucket via MinIO's host-mapped port + String minioHostEndpoint = "http://" + minio.getHost() + ":" + minio.getMappedPort(9000); + try (var s3Client = + software.amazon.awssdk.services.s3.S3Client.builder() + .endpointOverride(java.net.URI.create(minioHostEndpoint)) + .region(software.amazon.awssdk.regions.Region.US_EAST_1) + .credentialsProvider( + software.amazon.awssdk.auth.credentials.StaticCredentialsProvider.create( + software.amazon.awssdk.auth.credentials.AwsBasicCredentials.create( + "minioadmin", "minioadmin"))) + .forcePathStyle(true) + .build()) { + s3Client.createBucket( + software.amazon.awssdk.services.s3.model.CreateBucketRequest.builder() + .bucket("test-bucket") + .build()); + logger.info("Created test-bucket in MinIO"); + } + + // Build YAML config for the catalog container (using the Docker network alias for MinIO) + String catalogConfig = + String.join( + "\n", + "uri: \"jdbc:sqlite::memory:\"", + "warehouse: \"s3://test-bucket/warehouse\"", + "s3:", + " endpoint: \"http://minio:9000\"", + " pathStyleAccess: true", + " accessKeyID: \"minioadmin\"", + " secretAccessKey: \"minioadmin\"", + " region: \"us-east-1\"", + "anonymousAccess:", + " enabled: true", + " accessConfig:", + " readOnly: false", + ""); + + Path scenariosDir = getScenariosDirectory().toAbsolutePath(); + if (!Files.exists(scenariosDir) || !Files.isDirectory(scenariosDir)) { + throw new IllegalStateException( + "Scenarios directory must exist at " + + scenariosDir + + ". Run 'mvn test-compile' or run the test from Maven (e.g. mvn failsafe:integration-test)."); + } + Path insertScanInput = scenariosDir.resolve("insert-scan").resolve("input.parquet"); + if (!Files.exists(insertScanInput)) { + throw new IllegalStateException( + "Scenario input not found at " + + insertScanInput + + ". Ensure test resources are on the classpath and scenarios/insert-scan/input.parquet exists."); + } + + // Start the ice-rest-catalog container (debug-with-ice has ice CLI at /usr/local/bin/ice) + GenericContainer catalogContainer = + new GenericContainer<>(dockerImage) + .withNetwork(network) + .withExposedPorts(5000) + .withEnv("ICE_REST_CATALOG_CONFIG", "") + .withEnv("ICE_REST_CATALOG_CONFIG_YAML", catalogConfig) + .withFileSystemBind(scenariosDir.toString(), "/scenarios") + .waitingFor(Wait.forHttp("/v1/config").forPort(5000).forStatusCode(200)); + + catalog = catalogContainer; + try { + catalog.start(); + } catch (Exception e) { + if (catalog != null) { + logger.error("Catalog container logs (stdout): {}", catalog.getLogs()); + } + throw e; + } + + // Copy CLI config into container so ice CLI can talk to co-located REST server + File cliConfigHost = File.createTempFile("ice-docker-cli-", ".yaml"); + try { + Files.write(cliConfigHost.toPath(), "uri: http://localhost:5000\n".getBytes()); + catalog.copyFileToContainer( + MountableFile.forHostPath(cliConfigHost.toPath()), "/tmp/ice-cli.yaml"); + } finally { + cliConfigHost.delete(); + } + + logger.info( + "Catalog container started at {}:{}", catalog.getHost(), catalog.getMappedPort(5000)); + } + + @Override + @AfterClass + public void tearDown() { + if (catalog != null && catalog.isRunning()) { + catalog.stop(); + } + if (minio != null && minio.isRunning()) { + minio.stop(); + } + if (network != null) { + network.close(); + } + } + + @Override + protected ScenarioTestRunner createScenarioRunner(String scenarioName) throws Exception { + Path scenariosDir = getScenariosDirectory(); + + String containerId = catalog.getContainerId(); + + // Wrapper script on host: docker exec ice "$@" (CLI runs inside container) + File wrapperScript = File.createTempFile("ice-docker-exec-", ".sh"); + wrapperScript.deleteOnExit(); + String wrapperContent = "#!/bin/sh\n" + "exec docker exec " + containerId + " ice \"$@\"\n"; + Files.write(wrapperScript.toPath(), wrapperContent.getBytes()); + if (!wrapperScript.setExecutable(true)) { + throw new IllegalStateException("Could not set wrapper script executable: " + wrapperScript); + } + + Map templateVars = new HashMap<>(); + templateVars.put("ICE_CLI", wrapperScript.getAbsolutePath()); + templateVars.put("CLI_CONFIG", "/tmp/ice-cli.yaml"); + templateVars.put("SCENARIO_DIR", "/scenarios/" + scenarioName); + templateVars.put("MINIO_ENDPOINT", ""); + templateVars.put("CATALOG_URI", "http://localhost:5000"); + + return new ScenarioTestRunner(scenariosDir, templateVars); + } +} diff --git a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/RESTCatalogTestBase.java b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/RESTCatalogTestBase.java index d7ffcf85..54c94d40 100644 --- a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/RESTCatalogTestBase.java +++ b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/RESTCatalogTestBase.java @@ -14,7 +14,12 @@ import com.altinity.ice.rest.catalog.internal.config.Config; import java.io.File; import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; import java.util.Map; import org.apache.iceberg.catalog.Catalog; import org.eclipse.jetty.server.Server; @@ -23,6 +28,8 @@ import org.testcontainers.containers.GenericContainer; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.regions.Region; @@ -152,4 +159,94 @@ protected String getMinioEndpoint() { protected String getCatalogUri() { return "http://localhost:8080"; } + + /** + * Get the path to the scenarios directory. + * + * @return Path to scenarios directory + * @throws URISyntaxException If the resource URL cannot be converted to a path + */ + protected Path getScenariosDirectory() throws URISyntaxException { + URL scenariosUrl = getClass().getClassLoader().getResource("scenarios"); + if (scenariosUrl == null) { + return Paths.get("src/test/resources/scenarios"); + } + return Paths.get(scenariosUrl.toURI()); + } + + /** + * Create a ScenarioTestRunner for the given scenario. Subclasses provide host or container-based + * CLI and config. + * + * @param scenarioName Name of the scenario (e.g. for container path resolution) + * @return Configured ScenarioTestRunner + * @throws Exception If there's an error creating the runner + */ + protected abstract ScenarioTestRunner createScenarioRunner(String scenarioName) throws Exception; + + /** Data provider that discovers all test scenarios. */ + @DataProvider(name = "scenarios") + public Object[][] scenarioProvider() throws Exception { + Path scenariosDir = getScenariosDirectory(); + ScenarioTestRunner runner = new ScenarioTestRunner(scenariosDir, Map.of()); + List scenarios = runner.discoverScenarios(); + + if (scenarios.isEmpty()) { + logger.warn("No test scenarios found in: {}", scenariosDir); + return new Object[0][0]; + } + + logger.info("Discovered {} test scenario(s): {}", scenarios.size(), scenarios); + + Object[][] data = new Object[scenarios.size()][1]; + for (int i = 0; i < scenarios.size(); i++) { + data[i][0] = scenarios.get(i); + } + return data; + } + + /** Parameterized test that executes a single scenario. */ + @Test(dataProvider = "scenarios") + public void testScenario(String scenarioName) throws Exception { + logger.info("====== Starting scenario test: {} ======", scenarioName); + + ScenarioTestRunner runner = createScenarioRunner(scenarioName); + ScenarioTestRunner.ScenarioResult result = runner.executeScenario(scenarioName); + + if (result.runScriptResult() != null) { + logger.info("Run script exit code: {}", result.runScriptResult().exitCode()); + } + if (result.verifyScriptResult() != null) { + logger.info("Verify script exit code: {}", result.verifyScriptResult().exitCode()); + } + + assertScenarioSuccess(scenarioName, result); + logger.info("====== Scenario test passed: {} ======", scenarioName); + } + + /** Assert that the scenario result indicates success; otherwise throw AssertionError. */ + protected void assertScenarioSuccess( + String scenarioName, ScenarioTestRunner.ScenarioResult result) { + if (result.isSuccess()) { + return; + } + StringBuilder errorMessage = new StringBuilder(); + errorMessage.append("Scenario '").append(scenarioName).append("' failed:\n"); + + if (result.runScriptResult() != null && result.runScriptResult().exitCode() != 0) { + errorMessage.append("\nRun script failed with exit code: "); + errorMessage.append(result.runScriptResult().exitCode()); + errorMessage.append("\nStdout:\n").append(result.runScriptResult().stdout()); + errorMessage.append("\nStderr:\n").append(result.runScriptResult().stderr()); + } + + if (result.verifyScriptResult() != null && result.verifyScriptResult().exitCode() != 0) { + errorMessage.append("\nVerify script failed with exit code: "); + errorMessage.append(result.verifyScriptResult().exitCode()); + errorMessage.append("\nStdout:\n").append(result.verifyScriptResult().stdout()); + errorMessage.append("\nStderr:\n").append(result.verifyScriptResult().stderr()); + } + + throw new AssertionError(errorMessage.toString()); + } } diff --git a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioBasedIT.java b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioBasedIT.java index b4351344..24b098e7 100644 --- a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioBasedIT.java +++ b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioBasedIT.java @@ -10,15 +10,10 @@ package com.altinity.ice.rest.catalog; import java.io.File; -import java.net.URISyntaxException; -import java.net.URL; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; -import java.util.List; import java.util.Map; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; /** * Scenario-based integration tests for ICE REST Catalog. @@ -28,92 +23,8 @@ */ public class ScenarioBasedIT extends RESTCatalogTestBase { - /** - * Data provider that discovers all test scenarios. - * - * @return Array of scenario names to be used as test parameters - * @throws Exception If there's an error discovering scenarios - */ - @DataProvider(name = "scenarios") - public Object[][] scenarioProvider() throws Exception { - Path scenariosDir = getScenariosDirectory(); - ScenarioTestRunner runner = createScenarioRunner(); - - List scenarios = runner.discoverScenarios(); - - if (scenarios.isEmpty()) { - logger.warn("No test scenarios found in: {}", scenariosDir); - return new Object[0][0]; - } - - logger.info("Discovered {} test scenario(s): {}", scenarios.size(), scenarios); - - // Convert to Object[][] for TestNG data provider - Object[][] data = new Object[scenarios.size()][1]; - for (int i = 0; i < scenarios.size(); i++) { - data[i][0] = scenarios.get(i); - } - return data; - } - - /** - * Parameterized test that executes a single scenario. - * - * @param scenarioName Name of the scenario to execute - * @throws Exception If the scenario execution fails - */ - @Test(dataProvider = "scenarios") - public void testScenario(String scenarioName) throws Exception { - logger.info("====== Starting scenario test: {} ======", scenarioName); - - ScenarioTestRunner runner = createScenarioRunner(); - ScenarioTestRunner.ScenarioResult result = runner.executeScenario(scenarioName); - - // Log results - if (result.runScriptResult() != null) { - logger.info("Run script exit code: {}", result.runScriptResult().exitCode()); - } - - if (result.verifyScriptResult() != null) { - logger.info("Verify script exit code: {}", result.verifyScriptResult().exitCode()); - } - - // Assert success - if (!result.isSuccess()) { - StringBuilder errorMessage = new StringBuilder(); - errorMessage.append("Scenario '").append(scenarioName).append("' failed:\n"); - - if (result.runScriptResult() != null && result.runScriptResult().exitCode() != 0) { - errorMessage.append("\nRun script failed with exit code: "); - errorMessage.append(result.runScriptResult().exitCode()); - errorMessage.append("\nStdout:\n"); - errorMessage.append(result.runScriptResult().stdout()); - errorMessage.append("\nStderr:\n"); - errorMessage.append(result.runScriptResult().stderr()); - } - - if (result.verifyScriptResult() != null && result.verifyScriptResult().exitCode() != 0) { - errorMessage.append("\nVerify script failed with exit code: "); - errorMessage.append(result.verifyScriptResult().exitCode()); - errorMessage.append("\nStdout:\n"); - errorMessage.append(result.verifyScriptResult().stdout()); - errorMessage.append("\nStderr:\n"); - errorMessage.append(result.verifyScriptResult().stderr()); - } - - throw new AssertionError(errorMessage.toString()); - } - - logger.info("====== Scenario test passed: {} ======", scenarioName); - } - - /** - * Create a ScenarioTestRunner with the appropriate template variables. - * - * @return Configured ScenarioTestRunner - * @throws Exception If there's an error creating the runner - */ - private ScenarioTestRunner createScenarioRunner() throws Exception { + @Override + protected ScenarioTestRunner createScenarioRunner(String scenarioName) throws Exception { Path scenariosDir = getScenariosDirectory(); // Create CLI config file @@ -143,22 +54,4 @@ private ScenarioTestRunner createScenarioRunner() throws Exception { return new ScenarioTestRunner(scenariosDir, templateVars); } - - /** - * Get the path to the scenarios directory. - * - * @return Path to scenarios directory - * @throws URISyntaxException If the resource URL cannot be converted to a path - */ - private Path getScenariosDirectory() throws URISyntaxException { - // Get the scenarios directory from test resources - URL scenariosUrl = getClass().getClassLoader().getResource("scenarios"); - - if (scenariosUrl == null) { - // If not found in resources, try relative to project - return Paths.get("src/test/resources/scenarios"); - } - - return Paths.get(scenariosUrl.toURI()); - } } diff --git a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java index 17d5b169..3d6ae098 100644 --- a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java +++ b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java @@ -9,7 +9,6 @@ */ package com.altinity.ice.rest.catalog; -import java.util.List; import java.util.Map; /** @@ -21,17 +20,7 @@ public record ScenarioConfig( String name, String description, CatalogConfig catalogConfig, - Map env, - CloudResources cloudResources, - List phases) { + Map env) { public record CatalogConfig(String warehouse, String name, String uri) {} - - public record CloudResources(S3Resources s3, SqsResources sqs) {} - - public record S3Resources(List buckets) {} - - public record SqsResources(List queues) {} - - public record Phase(String name, String description) {} } diff --git a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioTestRunner.java b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioTestRunner.java index 83a6b19d..c74d22b0 100644 --- a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioTestRunner.java +++ b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioTestRunner.java @@ -105,7 +105,9 @@ public ScenarioResult executeScenario(String scenarioName) throws Exception { // Build template variables map Map templateVars = new HashMap<>(globalTemplateVars); - templateVars.put("SCENARIO_DIR", scenarioDir.toAbsolutePath().toString()); + if (!templateVars.containsKey("SCENARIO_DIR")) { + templateVars.put("SCENARIO_DIR", scenarioDir.toAbsolutePath().toString()); + } // Add environment variables from scenario config if (config.env() != null) { diff --git a/ice-rest-catalog/src/test/resources/scenarios/README.md b/ice-rest-catalog/src/test/resources/scenarios/README.md index 0b0ba237..feb6de3e 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/README.md +++ b/ice-rest-catalog/src/test/resources/scenarios/README.md @@ -32,26 +32,6 @@ env: NAMESPACE_NAME: "test_ns" TABLE_NAME: "test_ns.table1" INPUT_FILE: "input.parquet" - -# Optional: Cloud resources needed (for future provisioning) -cloudResources: - s3: - buckets: - - "test-bucket" - sqs: - queues: - - "test-queue" - -# Optional: Test execution phases -phases: - - name: "setup" - description: "Initialize resources" - - name: "run" - description: "Execute main test logic" - - name: "verify" - description: "Verify results" - - name: "cleanup" - description: "Clean up resources" ``` ## Script Templates diff --git a/ice-rest-catalog/src/test/resources/scenarios/basic-operations/input.parquet b/ice-rest-catalog/src/test/resources/scenarios/basic-operations/input.parquet new file mode 100644 index 0000000000000000000000000000000000000000..028c64cf9f7d9ea46bfeeaf940482aaee90a1b54 GIT binary patch literal 2446 zcmZ`*L1-Ii7JgDj^8efVlXj z+h~)tReRFWhaP+iY7a$+Qgkq^*-PEa;-d=<2zpU(g6+Yl#l^)qB!vBb$#tBx9gt_{ z{qO(gz4v|Z{Vu=u=TMXW->5#yzNoj-c+To9(buwYj)=C>skKZhHy(ZdSt^wqi|&7y zN?D^(=3;JbG@?w3J<{(uM64qj3mr{oQZ#l|z@TipsH&9LfvnNEcG%`J+fs0g##Ncd zbUX7exWy$nkLrLEd4Po?Zml3GL2Z)^amx`evS2yK8Lb3r>3ybuE+Fm-Q=hc3= zNd<)4#fLqv(M9da4o6rocpN%8X>h^1rgkZHk(+Rj{l-b2p`%XBV1fC_E;2_-tzKG(RB zxnWH|)LsdNuF!!t4fhz?K0wudcULwRJSzEwmaEg@e2aLSeYTC_Yuo}$M_1AGM>@{zU?#KLo&SQ`ZEz>>IwEsSoA>Df=};m6B7Ba zYPh`*Hwha|&mO5BWFjo18v1$OCRwHj$n`+`nLnXJ50r-ichGNo)XJZ@VbL)d_zA^& zgo(2W(^`t(p^AoQy!rplOZJpnJOxz=gF0Hp^ypT|-Ly+QQoK(kQ1y)Z&I9Jjbf985Lr!D=ae)}z)e`vso zO&o&sm({zs?ko@E>94D|@2p?Hb@SG3Y2Mav-&lWt{rWOyZ9#kegVP=yoag^|!e@Gg zR#Gczopx`&49To~aQ*Kq?_Ildy}J63Wj#!Kcp~lO?yT09Z@zSM_5B-ns{_->!1U4j z$}g5&ZT0T6P6N}4Qw~T&A1>csU%mVB1W|q~?H1g>P2%Vob~O&ur@o@oSkK=Z%YQL6 zDPDEN;MBMrIbynhag-}9|3F6rxPXRw2n*3_$6p?|5J?1)tc-D%QCx}lac?$W} z(B!|*pF&9H>%5SA6e=JPiD-;}gstG4MkWHTQ+21e&5i48iNkM-0b*SR?d74NQmGPh0{{e!n B Date: Tue, 24 Feb 2026 10:28:13 -0600 Subject: [PATCH 2/9] Added logic to pass compression and option to use the source parquet compression(as-source) --- .../internal/maintenance/DataCompaction.java | 1 + .../main/java/com/altinity/ice/cli/Main.java | 6 + .../altinity/ice/cli/internal/cmd/Insert.java | 178 ++++++++++++------ 3 files changed, 128 insertions(+), 57 deletions(-) diff --git a/ice-rest-catalog/src/main/java/com/altinity/ice/rest/catalog/internal/maintenance/DataCompaction.java b/ice-rest-catalog/src/main/java/com/altinity/ice/rest/catalog/internal/maintenance/DataCompaction.java index 55e23fe4..a1b3028a 100644 --- a/ice-rest-catalog/src/main/java/com/altinity/ice/rest/catalog/internal/maintenance/DataCompaction.java +++ b/ice-rest-catalog/src/main/java/com/altinity/ice/rest/catalog/internal/maintenance/DataCompaction.java @@ -170,6 +170,7 @@ private void merge( Parquet.WriteBuilder writeBuilder = Parquet.write(outputFile) + .setAll(table.properties()) .overwrite(false) .createWriterFunc(GenericParquetWriter::buildWriter) .metricsConfig(metricsConfig) diff --git a/ice/src/main/java/com/altinity/ice/cli/Main.java b/ice/src/main/java/com/altinity/ice/cli/Main.java index a10e1da0..76564df7 100644 --- a/ice/src/main/java/com/altinity/ice/cli/Main.java +++ b/ice/src/main/java/com/altinity/ice/cli/Main.java @@ -414,6 +414,11 @@ void insert( description = "Number of threads to use for inserting data", defaultValue = "-1") int threadCount, + @CommandLine.Option( + names = {"--compression"}, + description = + "Parquet compression codec: gzip (default), zstd, snappy, lz4, brotli, uncompressed, or as-source") + String compression, @CommandLine.Option( names = {"--watch"}, description = "Event queue. Supported: AWS SQS") @@ -506,6 +511,7 @@ void insert( .sortOrderList(sortOrders) .threadCount( threadCount < 1 ? Runtime.getRuntime().availableProcessors() : threadCount) + .compression(compression) .build(); if (!watchMode) { diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java index 1e510680..b37d36f8 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java @@ -28,8 +28,10 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -76,6 +78,7 @@ import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.parquet.ParquetUtil; import org.apache.iceberg.rest.RESTCatalog; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.schema.MessageType; import org.slf4j.Logger; @@ -108,6 +111,22 @@ public static Result run( return new Result(0, 0); } + if (options.compression() != null) { + Set valid = + Arrays.stream(CompressionCodecName.values()) + .map(c -> c.name().toLowerCase(Locale.ENGLISH)) + .collect(Collectors.toCollection(HashSet::new)); + valid.add("as-source"); + if (!valid.contains(options.compression().toLowerCase(Locale.ENGLISH))) { + String accepted = String.join(", ", new TreeSet<>(valid)); + throw new IllegalArgumentException( + "Unknown --compression value: " + + options.compression() + + ". Accepted: " + + accepted); + } + } + Table table = catalog.loadTable(nsTable); // Create transaction and pass it to updatePartitionAndSortOrderMetadata @@ -501,66 +520,91 @@ private static List processFile( .build()); dataFileSizeInBytes = inputFile.getLength(); dataFile = dstDataFile; - } else if (partitionSpec.isPartitioned() && partitionKey == null) { - return copyPartitionedAndSorted( - file, - tableSchema, - partitionSpec, - sortOrder, - metricsConfig, - tableIO, - inputFile, - dstDataFileSource); - } else if (sortOrder.isSorted() && !sorted) { - return Collections.singletonList( - copySorted( - file, - dstDataFileSource.get(file), - tableSchema, - partitionSpec, - sortOrder, - metricsConfig, - tableIO, - inputFile, - dataFileNamingStrategy, - partitionKey)); } else { - // Table isn't partitioned or sorted. Copy as is. - String dstDataFile = dstDataFileSource.get(file); - if (checkNotExists.apply(dstDataFile)) { - return Collections.emptyList(); + // Copy path: compute compression override from CLI or as-source + String compressionCodecOverride = null; + if (options.compression() != null) { + if ("as-source".equalsIgnoreCase(options.compression())) { + var blocks = metadata.getBlocks(); + if (!blocks.isEmpty()) { + compressionCodecOverride = + blocks.get(0).getColumns().get(0).getCodec().name().toLowerCase(); + } + } else { + compressionCodecOverride = options.compression().toLowerCase(); + } } - OutputFile outputFile = - tableIO.newOutputFile(Strings.replacePrefix(dstDataFile, "s3://", "s3a://")); - // TODO: support transferTo below (note that compression, etc. might be different) - // try (var d = outputFile.create()) { - // try (var s = inputFile.newStream()) { s.transferTo(d); } - // } - Parquet.ReadBuilder readBuilder = - Parquet.read(inputFile) - .createReaderFunc(s -> GenericParquetReaders.buildReader(tableSchema, s)) - .project(tableSchema) - .reuseContainers(); - Parquet.WriteBuilder writeBuilder = - Parquet.write(outputFile) - .overwrite(dataFileNamingStrategy == DataFileNamingStrategy.Name.PRESERVE_ORIGINAL) - .createWriterFunc(GenericParquetWriter::buildWriter) - .metricsConfig(metricsConfig) - .schema(tableSchema); + if (partitionSpec.isPartitioned() && partitionKey == null) { + return copyPartitionedAndSorted( + file, + tableSchema, + partitionSpec, + sortOrder, + metricsConfig, + tableIO, + inputFile, + dstDataFileSource, + table.properties(), + compressionCodecOverride); + } else if (sortOrder.isSorted() && !sorted) { + return Collections.singletonList( + copySorted( + file, + dstDataFileSource.get(file), + tableSchema, + partitionSpec, + sortOrder, + metricsConfig, + tableIO, + inputFile, + dataFileNamingStrategy, + partitionKey, + table.properties(), + compressionCodecOverride)); + } else { + // Table isn't partitioned or sorted. Copy as is. + String dstDataFile = dstDataFileSource.get(file); + if (checkNotExists.apply(dstDataFile)) { + return Collections.emptyList(); + } + OutputFile outputFile = + tableIO.newOutputFile(Strings.replacePrefix(dstDataFile, "s3://", "s3a://")); + // TODO: support transferTo below (note that compression, etc. might be different) + // try (var d = outputFile.create()) { + // try (var s = inputFile.newStream()) { s.transferTo(d); } + // } + Parquet.ReadBuilder readBuilder = + Parquet.read(inputFile) + .createReaderFunc(s -> GenericParquetReaders.buildReader(tableSchema, s)) + .project(tableSchema) + .reuseContainers(); + + Parquet.WriteBuilder writeBuilder = + Parquet.write(outputFile) + .setAll(table.properties()) + .overwrite(dataFileNamingStrategy == DataFileNamingStrategy.Name.PRESERVE_ORIGINAL) + .createWriterFunc(GenericParquetWriter::buildWriter) + .metricsConfig(metricsConfig) + .schema(tableSchema); + if (compressionCodecOverride != null) { + + writeBuilder = writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); + } - logger.info("{}: copying to {}", file, dstDataFile); + logger.info("{}: copying to {}", file, dstDataFile); - try (CloseableIterable parquetReader = readBuilder.build()) { - try (FileAppender writer = writeBuilder.build()) { - writer.addAll(parquetReader); - writer.close(); // for write.length() - dataFileSizeInBytes = writer.length(); - metrics = writer.metrics(); + try (CloseableIterable parquetReader = readBuilder.build()) { + try (FileAppender writer = writeBuilder.build()) { + writer.addAll(parquetReader); + writer.close(); // for write.length() + dataFileSizeInBytes = writer.length(); + metrics = writer.metrics(); + } } - } - dataFile = dstDataFile; + dataFile = dstDataFile; + } } logger.info( "{}: adding data file (copy took {}s)", file, (System.currentTimeMillis() - start) / 1000); @@ -588,7 +632,9 @@ private static List copyPartitionedAndSorted( MetricsConfig metricsConfig, FileIO tableIO, InputFile inputFile, - DataFileNamingStrategy dstDataFileSource) + DataFileNamingStrategy dstDataFileSource, + Map tableProperties, + @Nullable String compressionCodecOverride) throws IOException { logger.info("{}: partitioning{}", file, sortOrder.isSorted() ? "+sorting" : ""); @@ -622,10 +668,14 @@ private static List copyPartitionedAndSorted( Parquet.WriteBuilder writeBuilder = Parquet.write(outputFile) + .setAll(tableProperties) .overwrite(true) // FIXME .createWriterFunc(GenericParquetWriter::buildWriter) .metricsConfig(metricsConfig) .schema(tableSchema); + if (compressionCodecOverride != null) { + writeBuilder = writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); + } try (FileAppender writer = writeBuilder.build()) { for (Record record : records) { @@ -668,7 +718,9 @@ private static DataFile copySorted( FileIO tableIO, InputFile inputFile, DataFileNamingStrategy.Name dataFileNamingStrategy, - PartitionKey partitionKey) + PartitionKey partitionKey, + Map tableProperties, + @Nullable String compressionCodecOverride) throws IOException { logger.info("{}: copying (sorted) to {}", file, dstDataFile); @@ -698,11 +750,15 @@ private static DataFile copySorted( // Write sorted records to outputFile Parquet.WriteBuilder writeBuilder = Parquet.write(outputFile) + .setAll(tableProperties) .overwrite( dataFileNamingStrategy == DataFileNamingStrategy.Name.PRESERVE_ORIGINAL) // FIXME .createWriterFunc(GenericParquetWriter::buildWriter) .metricsConfig(metricsConfig) .schema(tableSchema); + if (compressionCodecOverride != null) { + writeBuilder = writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); + } long fileSizeInBytes; Metrics metrics; @@ -793,7 +849,8 @@ public record Options( @Nullable String retryListFile, @Nullable List partitionList, @Nullable List sortOrderList, - int threadCount) { + int threadCount, + @Nullable String compression) { public static Builder builder() { return new Builder(); @@ -816,6 +873,7 @@ public static final class Builder { private List partitionList = List.of(); private List sortOrderList = List.of(); private int threadCount = Runtime.getRuntime().availableProcessors(); + private String compression; private Builder() {} @@ -899,6 +957,11 @@ public Builder threadCount(int threadCount) { return this; } + public Builder compression(String compression) { + this.compression = compression; + return this; + } + public Options build() { return new Options( dataFileNamingStrategy, @@ -916,7 +979,8 @@ public Options build() { retryListFile, partitionList, sortOrderList, - threadCount); + threadCount, + compression); } } } From 6b5fecc931f1fb3deda75ccf69c40c923d9840e5 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Thu, 26 Feb 2026 07:53:25 -0600 Subject: [PATCH 3/9] removed changes from master. --- .bin/pre-release-docker | 5 ----- .../java/com/altinity/ice/rest/catalog/ScenarioConfig.java | 1 - 2 files changed, 6 deletions(-) diff --git a/.bin/pre-release-docker b/.bin/pre-release-docker index 990a5597..2671de32 100755 --- a/.bin/pre-release-docker +++ b/.bin/pre-release-docker @@ -8,11 +8,6 @@ export SKIP_VERIFY=1 export PATH="$(pwd)/.bin:$PATH" -echo >&2 'Building ice Docker image' -docker-build-ice -echo >&2 'Building ice-rest-catalog Docker image' -docker-build-ice-rest-catalog - echo >&2 'Pushing ice Docker image' docker-build-ice --push echo >&2 'Pushing ice-rest-catalog Docker image' diff --git a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java index dbf3b97f..e7163456 100644 --- a/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java +++ b/ice-rest-catalog/src/test/java/com/altinity/ice/rest/catalog/ScenarioConfig.java @@ -9,7 +9,6 @@ */ package com.altinity.ice.rest.catalog; -import java.util.List; import java.util.Map; /** From 15c8338522e7abba494caa0ed4391ddae08ff676 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Thu, 26 Feb 2026 09:24:08 -0600 Subject: [PATCH 4/9] fix formatting error. --- .../com/altinity/ice/cli/internal/cmd/Insert.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java index b37d36f8..31b1a7aa 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java @@ -120,10 +120,7 @@ public static Result run( if (!valid.contains(options.compression().toLowerCase(Locale.ENGLISH))) { String accepted = String.join(", ", new TreeSet<>(valid)); throw new IllegalArgumentException( - "Unknown --compression value: " - + options.compression() - + ". Accepted: " - + accepted); + "Unknown --compression value: " + options.compression() + ". Accepted: " + accepted); } } @@ -589,7 +586,8 @@ private static List processFile( .schema(tableSchema); if (compressionCodecOverride != null) { - writeBuilder = writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); + writeBuilder = + writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); } logger.info("{}: copying to {}", file, dstDataFile); @@ -674,7 +672,8 @@ private static List copyPartitionedAndSorted( .metricsConfig(metricsConfig) .schema(tableSchema); if (compressionCodecOverride != null) { - writeBuilder = writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); + writeBuilder = + writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); } try (FileAppender writer = writeBuilder.build()) { @@ -757,7 +756,8 @@ private static DataFile copySorted( .metricsConfig(metricsConfig) .schema(tableSchema); if (compressionCodecOverride != null) { - writeBuilder = writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); + writeBuilder = + writeBuilder.set(TableProperties.PARQUET_COMPRESSION, compressionCodecOverride); } long fileSizeInBytes; From 85c6b0aac83e4ee8df9462824b3692e64d6efcc7 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Thu, 5 Mar 2026 10:12:26 -0600 Subject: [PATCH 5/9] Added test for schema evolution. --- .../scenarios/schema-evolution/run.sh.tmpl | 53 +++++++++++++++++++ .../scenarios/schema-evolution/scenario.yaml | 9 ++++ 2 files changed, 62 insertions(+) create mode 100644 ice-rest-catalog/src/test/resources/scenarios/schema-evolution/run.sh.tmpl create mode 100644 ice-rest-catalog/src/test/resources/scenarios/schema-evolution/scenario.yaml diff --git a/ice-rest-catalog/src/test/resources/scenarios/schema-evolution/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/schema-evolution/run.sh.tmpl new file mode 100644 index 00000000..4a076deb --- /dev/null +++ b/ice-rest-catalog/src/test/resources/scenarios/schema-evolution/run.sh.tmpl @@ -0,0 +1,53 @@ +#!/bin/bash +set -e + +echo "Running schema evolution test..." + +SCENARIO_DIR="{{SCENARIO_DIR}}" +INPUT_PATH="${SCENARIO_DIR}/../insert-scan/input.parquet" + +# Create namespace +{{ICE_CLI}} --config {{CLI_CONFIG}} create-namespace ${NAMESPACE_NAME} +echo "OK Created namespace: ${NAMESPACE_NAME}" + +# Insert table from iris parquet +{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} "file://${INPUT_PATH}" +echo "OK Inserted data into ${TABLE_NAME}" + +# Alter table: add column only (so same parquet remains valid for second insert) +{{ICE_CLI}} --config {{CLI_CONFIG}} alter-table ${TABLE_NAME} $'[{"op":"add_column","name":"extra","type":"string"}]' +echo "OK Altered table schema" + +# Verify schema: expect extra +{{ICE_CLI}} --config {{CLI_CONFIG}} describe -s ${TABLE_NAME} > /tmp/schema_ev_describe.txt +if ! grep -q "extra" /tmp/schema_ev_describe.txt; then + echo "FAIL describe -s missing expected column 'extra'" + cat /tmp/schema_ev_describe.txt + exit 1 +fi +echo "OK Schema verified" + +# First scan to get baseline row count (scan output: header + data lines) +{{ICE_CLI}} --config {{CLI_CONFIG}} scan ${TABLE_NAME} --limit 500 > /tmp/schema_ev_scan1.txt +FIRST_LINES=$(wc -l < /tmp/schema_ev_scan1.txt) +echo "OK First scan: ${FIRST_LINES} lines" + +# Second insert from same parquet (file schema is subset of table after add_column only) +{{ICE_CLI}} --config {{CLI_CONFIG}} insert ${TABLE_NAME} "file://${INPUT_PATH}" +echo "OK Second insert completed" + +# Second scan: expect more rows (more lines) +{{ICE_CLI}} --config {{CLI_CONFIG}} scan ${TABLE_NAME} --limit 500 > /tmp/schema_ev_scan2.txt +SECOND_LINES=$(wc -l < /tmp/schema_ev_scan2.txt) +if [ "${SECOND_LINES}" -le "${FIRST_LINES}" ]; then + echo "FAIL second scan should have more lines (got ${SECOND_LINES}, first had ${FIRST_LINES})" + exit 1 +fi +echo "OK Second scan: ${SECOND_LINES} lines (rows inserted)" + +# Cleanup +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME} +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME} +echo "OK Cleanup done" + +echo "Schema evolution test completed successfully" diff --git a/ice-rest-catalog/src/test/resources/scenarios/schema-evolution/scenario.yaml b/ice-rest-catalog/src/test/resources/scenarios/schema-evolution/scenario.yaml new file mode 100644 index 00000000..8a753b63 --- /dev/null +++ b/ice-rest-catalog/src/test/resources/scenarios/schema-evolution/scenario.yaml @@ -0,0 +1,9 @@ +name: "Schema evolution (add, drop, rename)" +description: "Tests alter-table: add column, drop column, rename column" + +catalogConfig: + warehouse: "s3://test-bucket/warehouse" + +env: + NAMESPACE_NAME: "test_schema_ev" + TABLE_NAME: "test_schema_ev.t1" From 28253fda7be462afc5d5913a31d4ced9c587cb2c Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Fri, 6 Mar 2026 14:49:22 -0600 Subject: [PATCH 6/9] Added test for testing custom compression --- .../scenarios/insert-compression/run.sh.tmpl | 44 +++++++++++++++++++ .../insert-compression/scenario.yaml | 10 +++++ 2 files changed, 54 insertions(+) create mode 100644 ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl create mode 100644 ice-rest-catalog/src/test/resources/scenarios/insert-compression/scenario.yaml diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl new file mode 100644 index 00000000..ff58c475 --- /dev/null +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl @@ -0,0 +1,44 @@ +#!/bin/bash +set -e + +echo "Running insert with compression test..." + +SCENARIO_DIR="{{SCENARIO_DIR}}" +INPUT_PATH="${SCENARIO_DIR}/../insert-scan/input.parquet" + +# Create namespace +{{ICE_CLI}} --config {{CLI_CONFIG}} create-namespace ${NAMESPACE_NAME} +echo "OK Created namespace" + +# Insert with zstd compression +{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} "file://${INPUT_PATH}" --compression=zstd +echo "OK Inserted data with zstd compression" + +# Scan to verify data is readable +{{ICE_CLI}} --config {{CLI_CONFIG}} scan ${TABLE_NAME} > /tmp/compression_scan.txt +if ! grep -q "sepal.length" /tmp/compression_scan.txt; then + echo "FAIL Scan output missing expected column 'sepal.length'" + cat /tmp/compression_scan.txt + exit 1 +fi +echo "OK Scan verified data is readable after zstd insert" + +# Insert again with snappy to verify a second codec works +{{ICE_CLI}} --config {{CLI_CONFIG}} insert ${TABLE_NAME} "file://${INPUT_PATH}" --compression=snappy +echo "OK Inserted data with snappy compression" + +# Scan again to verify both inserts are readable +{{ICE_CLI}} --config {{CLI_CONFIG}} scan ${TABLE_NAME} --limit 500 > /tmp/compression_scan2.txt +LINES=$(wc -l < /tmp/compression_scan2.txt) +if [ "${LINES}" -le 1 ]; then + echo "FAIL Scan after second insert returned no data" + exit 1 +fi +echo "OK Scan after second insert: ${LINES} lines" + +# Cleanup +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME} +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME} +echo "OK Cleanup done" + +echo "Insert with compression test completed successfully" diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-compression/scenario.yaml b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/scenario.yaml new file mode 100644 index 00000000..7be5d0d9 --- /dev/null +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/scenario.yaml @@ -0,0 +1,10 @@ +name: "Insert with compression codec" +description: "Tests inserting data with --compression flag (zstd) and verifying data is readable" + +catalogConfig: + warehouse: "s3://test-bucket/warehouse" + +env: + NAMESPACE_NAME: "test_compression" + TABLE_NAME: "test_compression.iris_zstd" + INPUT_FILE: "input.parquet" From aec99468e1a4f4e43eafe9904b4a69d7078fe2eb Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Fri, 6 Mar 2026 15:05:53 -0600 Subject: [PATCH 7/9] Removed logic of using describe to check compression --- .../scenarios/insert-compression/run.sh.tmpl | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl index ff58c475..0c1a6312 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl @@ -6,6 +6,13 @@ echo "Running insert with compression test..." SCENARIO_DIR="{{SCENARIO_DIR}}" INPUT_PATH="${SCENARIO_DIR}/../insert-scan/input.parquet" +# Verify invalid compression codec is rejected +if {{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} "file://${INPUT_PATH}" --compression=invalid 2>/dev/null; then + echo "FAIL insert should have rejected invalid compression codec" + exit 1 +fi +echo "OK Invalid compression codec rejected" + # Create namespace {{ICE_CLI}} --config {{CLI_CONFIG}} create-namespace ${NAMESPACE_NAME} echo "OK Created namespace" @@ -14,27 +21,28 @@ echo "OK Created namespace" {{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} "file://${INPUT_PATH}" --compression=zstd echo "OK Inserted data with zstd compression" -# Scan to verify data is readable -{{ICE_CLI}} --config {{CLI_CONFIG}} scan ${TABLE_NAME} > /tmp/compression_scan.txt -if ! grep -q "sepal.length" /tmp/compression_scan.txt; then - echo "FAIL Scan output missing expected column 'sepal.length'" - cat /tmp/compression_scan.txt +# Scan to verify data written with zstd is readable +{{ICE_CLI}} --config {{CLI_CONFIG}} scan ${TABLE_NAME} > /tmp/compression_scan1.txt +if ! grep -q "sepal.length" /tmp/compression_scan1.txt; then + echo "FAIL Scan output missing expected column after zstd insert" + cat /tmp/compression_scan1.txt exit 1 fi -echo "OK Scan verified data is readable after zstd insert" +FIRST_LINES=$(wc -l < /tmp/compression_scan1.txt) +echo "OK Scan after zstd insert: ${FIRST_LINES} lines" -# Insert again with snappy to verify a second codec works +# Insert again with snappy compression {{ICE_CLI}} --config {{CLI_CONFIG}} insert ${TABLE_NAME} "file://${INPUT_PATH}" --compression=snappy echo "OK Inserted data with snappy compression" -# Scan again to verify both inserts are readable +# Scan to verify both zstd and snappy data files are readable together {{ICE_CLI}} --config {{CLI_CONFIG}} scan ${TABLE_NAME} --limit 500 > /tmp/compression_scan2.txt -LINES=$(wc -l < /tmp/compression_scan2.txt) -if [ "${LINES}" -le 1 ]; then - echo "FAIL Scan after second insert returned no data" +SECOND_LINES=$(wc -l < /tmp/compression_scan2.txt) +if [ "${SECOND_LINES}" -le "${FIRST_LINES}" ]; then + echo "FAIL Second scan should have more rows (got ${SECOND_LINES}, first had ${FIRST_LINES})" exit 1 fi -echo "OK Scan after second insert: ${LINES} lines" +echo "OK Scan after snappy insert: ${SECOND_LINES} lines" # Cleanup {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME} From 36a4702ca66564b3515e5d4f7c5fa17b0818a9e1 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Fri, 6 Mar 2026 15:26:24 -0600 Subject: [PATCH 8/9] Moved test after create-namespace --- .../resources/scenarios/insert-compression/run.sh.tmpl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl index 0c1a6312..f293a788 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-compression/run.sh.tmpl @@ -6,6 +6,10 @@ echo "Running insert with compression test..." SCENARIO_DIR="{{SCENARIO_DIR}}" INPUT_PATH="${SCENARIO_DIR}/../insert-scan/input.parquet" +# Create namespace and table +{{ICE_CLI}} --config {{CLI_CONFIG}} create-namespace ${NAMESPACE_NAME} +echo "OK Created namespace" + # Verify invalid compression codec is rejected if {{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} "file://${INPUT_PATH}" --compression=invalid 2>/dev/null; then echo "FAIL insert should have rejected invalid compression codec" @@ -13,10 +17,6 @@ if {{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} "file fi echo "OK Invalid compression codec rejected" -# Create namespace -{{ICE_CLI}} --config {{CLI_CONFIG}} create-namespace ${NAMESPACE_NAME} -echo "OK Created namespace" - # Insert with zstd compression {{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} "file://${INPUT_PATH}" --compression=zstd echo "OK Inserted data with zstd compression" From 92bf58af1e80d254dd3c3ba2d94c3728cf2cd381 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Fri, 6 Mar 2026 17:50:41 -0600 Subject: [PATCH 9/9] Renamed variable to validCompressionCodec so its understandable --- .../java/com/altinity/ice/cli/internal/cmd/Insert.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java index 31b1a7aa..d8139aee 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java @@ -112,13 +112,14 @@ public static Result run( } if (options.compression() != null) { - Set valid = + // Validate against the list of parquet compression codecs supported by Iceberg. + Set validCompressionCodecs = Arrays.stream(CompressionCodecName.values()) .map(c -> c.name().toLowerCase(Locale.ENGLISH)) .collect(Collectors.toCollection(HashSet::new)); - valid.add("as-source"); - if (!valid.contains(options.compression().toLowerCase(Locale.ENGLISH))) { - String accepted = String.join(", ", new TreeSet<>(valid)); + validCompressionCodecs.add("as-source"); + if (!validCompressionCodecs.contains(options.compression().toLowerCase(Locale.ENGLISH))) { + String accepted = String.join(", ", new TreeSet<>(validCompressionCodecs)); throw new IllegalArgumentException( "Unknown --compression value: " + options.compression() + ". Accepted: " + accepted); }