Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,22 @@ jobs:
timeout-minutes: 30
strategy:
fail-fast: false
env:
ICEBERG_TEST_S3_URI: s3://iceberg-test
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123
AWS_DEFAULT_REGION: us-east-1
AWS_ENDPOINT_URL: http://127.0.0.1:9000
AWS_EC2_METADATA_DISABLED: "TRUE"
steps:
- name: Checkout iceberg-cpp
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install dependencies
shell: bash
run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev
- name: Start MinIO
shell: bash
run: bash ci/scripts/start_minio.sh
- name: Build Iceberg
shell: bash
env:
Expand All @@ -67,9 +77,19 @@ jobs:
timeout-minutes: 30
strategy:
fail-fast: false
env:
ICEBERG_TEST_S3_URI: s3://iceberg-test
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123
AWS_DEFAULT_REGION: us-east-1
AWS_ENDPOINT_URL: http://127.0.0.1:9000
AWS_EC2_METADATA_DISABLED: "TRUE"
steps:
- name: Checkout iceberg-cpp
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Start MinIO
shell: bash
run: bash ci/scripts/start_minio.sh
- name: Build Iceberg
shell: bash
run: ci/scripts/build_iceberg.sh $(pwd)
Expand All @@ -82,6 +102,13 @@ jobs:
timeout-minutes: 60
strategy:
fail-fast: false
env:
ICEBERG_TEST_S3_URI: s3://iceberg-test
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123
AWS_DEFAULT_REGION: us-east-1
AWS_ENDPOINT_URL: http://127.0.0.1:9000
AWS_EC2_METADATA_DISABLED: "TRUE"
steps:
- name: Checkout iceberg-cpp
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
Expand All @@ -91,6 +118,9 @@ jobs:
vcpkg install zlib:x64-windows nlohmann-json:x64-windows nanoarrow:x64-windows roaring:x64-windows cpr:x64-windows
- name: Setup sccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9
- name: Start MinIO
shell: bash
run: bash ci/scripts/start_minio.sh
- name: Build Iceberg
shell: cmd
env:
Expand Down
127 changes: 127 additions & 0 deletions ci/scripts/start_minio.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -eux

MINIO_ROOT_USER="${MINIO_ROOT_USER:-minio}"
MINIO_ROOT_PASSWORD="${MINIO_ROOT_PASSWORD:-minio123}"
MINIO_IMAGE="${MINIO_IMAGE:-minio/minio:RELEASE.2024-12-18T00-00-00Z}"
MINIO_CONTAINER_NAME="${MINIO_CONTAINER_NAME:-iceberg-minio}"
MINIO_PORT="${MINIO_PORT:-9000}"
MINIO_CONSOLE_PORT="${MINIO_CONSOLE_PORT:-9001}"
MINIO_BUCKET="${MINIO_BUCKET:-iceberg-test}"
MINIO_ENDPOINT="${MINIO_ENDPOINT:-http://127.0.0.1:${MINIO_PORT}}"

wait_for_minio() {
for i in {1..30}; do
if curl -fsS "${MINIO_ENDPOINT}/minio/health/ready" >/dev/null; then
return 0
fi
sleep 1
done
return 1
}

start_minio_docker() {
if ! command -v docker >/dev/null 2>&1; then
return 1
fi

if docker ps -a --format '{{.Names}}' | grep -q "^${MINIO_CONTAINER_NAME}\$"; then
docker rm -f "${MINIO_CONTAINER_NAME}"
fi

docker run -d --name "${MINIO_CONTAINER_NAME}" \
-p "${MINIO_PORT}:9000" -p "${MINIO_CONSOLE_PORT}:9001" \
-e "MINIO_ROOT_USER=${MINIO_ROOT_USER}" \
-e "MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}" \
"${MINIO_IMAGE}" \
server /data --console-address ":${MINIO_CONSOLE_PORT}"

wait_for_minio
}

start_minio_macos() {
if ! command -v brew >/dev/null 2>&1; then
echo "brew is required to start MinIO on macOS without Docker" >&2
return 1
fi

brew install minio
minio server /tmp/minio --console-address ":${MINIO_CONSOLE_PORT}" &
wait_for_minio
}

download_mc() {
local uname_out
uname_out="$(uname -s)"

local mc_dir
mc_dir="${RUNNER_TEMP:-/tmp}"
mkdir -p "${mc_dir}"

case "${uname_out}" in
Linux*)
MC_BIN="${mc_dir}/mc"
curl -sSL "https://dl.min.io/client/mc/release/linux-amd64/mc" -o "${MC_BIN}"
chmod +x "${MC_BIN}"
;;
Darwin*)
MC_BIN="${mc_dir}/mc"
curl -sSL "https://dl.min.io/client/mc/release/darwin-amd64/mc" -o "${MC_BIN}"
chmod +x "${MC_BIN}"
;;
MINGW*|MSYS*|CYGWIN*)
MC_BIN="${mc_dir}/mc.exe"
curl -sSL "https://dl.min.io/client/mc/release/windows-amd64/mc.exe" -o "${MC_BIN}"
;;
*)
echo "Unsupported OS for mc: ${uname_out}" >&2
return 1
;;
esac
}

create_bucket() {
download_mc
for i in {1..30}; do
if "${MC_BIN}" alias set local "${MINIO_ENDPOINT}" "${MINIO_ROOT_USER}" "${MINIO_ROOT_PASSWORD}"; then
break
fi
sleep 1
done
"${MC_BIN}" mb --ignore-existing "local/${MINIO_BUCKET}"
}

case "$(uname -s)" in
Darwin*)
if ! start_minio_docker; then
start_minio_macos
fi
;;
Linux*|MINGW*|MSYS*|CYGWIN*)
start_minio_docker
;;
*)
echo "Unsupported OS: $(uname -s)" >&2
exit 1
;;
esac

create_bucket
1 change: 1 addition & 0 deletions cmake_modules/IcebergThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ function(resolve_arrow_dependency)
# Work around undefined symbol: arrow::ipc::ReadSchema(arrow::io::InputStream*, arrow::ipc::DictionaryMemo*)
set(ARROW_IPC ON)
set(ARROW_FILESYSTEM ON)
set(ARROW_S3 ON)
set(ARROW_JSON ON)
set(ARROW_PARQUET ON)
set(ARROW_SIMD_LEVEL "NONE")
Expand Down
3 changes: 3 additions & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ set(ICEBERG_SOURCES
expression/rewrite_not.cc
expression/strict_metrics_evaluator.cc
expression/term.cc
file_io_registry.cc
file_reader.cc
file_writer.cc
inheritable_metadata.cc
Expand Down Expand Up @@ -171,6 +172,8 @@ add_subdirectory(util)
if(ICEBERG_BUILD_BUNDLE)
set(ICEBERG_BUNDLE_SOURCES
arrow/arrow_fs_file_io.cc
arrow/arrow_s3_file_io.cc
arrow/file_io_register.cc
arrow/metadata_column_util.cc
avro/avro_data_util.cc
avro/avro_direct_decoder.cc
Expand Down
17 changes: 17 additions & 0 deletions src/iceberg/arrow/arrow_file_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,31 @@
#pragma once

#include <memory>
#include <string>
#include <unordered_map>

#include "iceberg/file_io.h"
#include "iceberg/iceberg_bundle_export.h"
#include "iceberg/result.h"

namespace iceberg::arrow {

ICEBERG_BUNDLE_EXPORT std::unique_ptr<FileIO> MakeMockFileIO();

ICEBERG_BUNDLE_EXPORT std::unique_ptr<FileIO> MakeLocalFileIO();

/// \brief Create an S3 FileIO backed by Arrow's S3FileSystem.
///
/// This function initializes the S3 subsystem if not already initialized (thread-safe).
/// The S3 initialization is done once per process using std::call_once.
///
/// \param uri An S3 URI (must start with "s3://") used to validate the scheme.
/// \param properties Optional configuration properties for S3 access. See S3Properties
/// for available keys (credentials, region, endpoint, timeouts, etc.).
/// \return A FileIO instance for S3 operations, or an error if S3 is not supported
/// or the URI is invalid.
ICEBERG_BUNDLE_EXPORT Result<std::unique_ptr<FileIO>> MakeS3FileIO(
const std::string& uri,
const std::unordered_map<std::string, std::string>& properties = {});

} // namespace iceberg::arrow
Loading
Loading