From d69971b4f2b053f5f27464976ba5440e1b894a5f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 9 Feb 2026 09:33:36 -0300 Subject: [PATCH 1/5] add setting to define filename pattern for part exports --- src/Core/Settings.cpp | 3 + ...portReplicatedMergeTreePartitionManifest.h | 3 + src/Storages/MergeTree/ExportPartTask.cpp | 26 ++++- .../ExportPartitionTaskScheduler.cpp | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 + .../configs/macros_shard1_replica1.xml | 6 + .../configs/macros_shard2_replica1.xml | 6 + .../test.py | 107 ++++++++++++++++++ ...merge_tree_part_filename_pattern.reference | 16 +++ ...export_merge_tree_part_filename_pattern.sh | 49 ++++++++ 10 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard1_replica1.xml create mode 100644 tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard2_replica1.xml create mode 100644 tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.reference create mode 100755 tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.sh diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 66859307674e..1e9e5bec0f25 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -7388,6 +7388,9 @@ Possible values: - `` (empty value) - use server or session timezone Default value is empty. +)", 0) \ + DECLARE(String, export_merge_tree_part_filename_pattern, "{part_name}_{checksum}", R"( +Pattern for the filename of the exported merge tree part. The `part_name` and `checksum` are calculated and replaced on the fly. Additional macros are supported. )", 0) \ \ /* ####################################################### */ \ diff --git a/src/Storages/ExportReplicatedMergeTreePartitionManifest.h b/src/Storages/ExportReplicatedMergeTreePartitionManifest.h index 31b8731c4e8b..a41401dee09b 100644 --- a/src/Storages/ExportReplicatedMergeTreePartitionManifest.h +++ b/src/Storages/ExportReplicatedMergeTreePartitionManifest.h @@ -116,6 +116,7 @@ struct ExportReplicatedMergeTreePartitionManifest size_t max_bytes_per_file; size_t max_rows_per_file; MergeTreePartExportManifest::FileAlreadyExistsPolicy file_already_exists_policy; + String filename_pattern; bool lock_inside_the_task; /// todo temporary std::string toJsonString() const @@ -139,6 +140,7 @@ struct ExportReplicatedMergeTreePartitionManifest json.set("max_bytes_per_file", max_bytes_per_file); json.set("max_rows_per_file", max_rows_per_file); json.set("file_already_exists_policy", String(magic_enum::enum_name(file_already_exists_policy))); + json.set("filename_pattern", filename_pattern); json.set("create_time", create_time); json.set("max_retries", max_retries); json.set("ttl_seconds", ttl_seconds); @@ -175,6 +177,7 @@ struct ExportReplicatedMergeTreePartitionManifest manifest.parquet_parallel_encoding = json->getValue("parquet_parallel_encoding"); manifest.max_bytes_per_file = json->getValue("max_bytes_per_file"); manifest.max_rows_per_file = json->getValue("max_rows_per_file"); + manifest.filename_pattern = json->getValue("filename_pattern"); if (json->has("file_already_exists_policy")) { diff --git a/src/Storages/MergeTree/ExportPartTask.cpp b/src/Storages/MergeTree/ExportPartTask.cpp index 8305093a8c63..5f619f1df6be 100644 --- a/src/Storages/MergeTree/ExportPartTask.cpp +++ b/src/Storages/MergeTree/ExportPartTask.cpp @@ -3,11 +3,13 @@ #include #include #include -#include #include #include +#include +#include #include #include +#include #include #include #include @@ -47,6 +49,7 @@ namespace Setting extern const SettingsUInt64 export_merge_tree_part_max_bytes_per_file; extern const SettingsUInt64 export_merge_tree_part_max_rows_per_file; extern const SettingsBool allow_experimental_analyzer; + extern const SettingsString export_merge_tree_part_filename_pattern; } namespace @@ -93,6 +96,23 @@ namespace plan_for_part.addStep(std::move(expression_step)); } } + + String buildDestinationFilename( + const MergeTreePartExportManifest & manifest, + const StorageID & storage_id, + const ContextPtr & local_context) + { + auto filename = manifest.settings[Setting::export_merge_tree_part_filename_pattern].value; + + boost::replace_all(filename, "{part_name}", manifest.data_part->name); + boost::replace_all(filename, "{checksum}", manifest.data_part->checksums.getTotalChecksumHex()); + + Macros::MacroExpansionInfo macro_info; + macro_info.table_id = storage_id; + filename = local_context->getMacros()->expand(filename, macro_info); + + return filename; + } } ExportPartTask::ExportPartTask(MergeTreeData & storage_, const MergeTreePartExportManifest & manifest_) @@ -154,8 +174,10 @@ bool ExportPartTask::executeStep() try { + const auto filename = buildDestinationFilename(manifest, storage.getStorageID(), local_context); + sink = destination_storage->import( - manifest.data_part->name + "_" + manifest.data_part->checksums.getTotalChecksumHex(), + filename, block_with_partition_values, new_file_path_callback, manifest.file_already_exists_policy == MergeTreePartExportManifest::FileAlreadyExistsPolicy::overwrite, diff --git a/src/Storages/MergeTree/ExportPartitionTaskScheduler.cpp b/src/Storages/MergeTree/ExportPartitionTaskScheduler.cpp index 89042fe694b9..909ada39749e 100644 --- a/src/Storages/MergeTree/ExportPartitionTaskScheduler.cpp +++ b/src/Storages/MergeTree/ExportPartitionTaskScheduler.cpp @@ -56,7 +56,9 @@ namespace context_copy->setSetting("export_merge_tree_part_throw_on_pending_mutations", false); context_copy->setSetting("export_merge_tree_part_throw_on_pending_patch_parts", false); - return context_copy; + context_copy->setSetting("export_merge_tree_part_filename_pattern", manifest.filename_pattern); + + return context_copy; } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f12d79c961bb..629fc87eeefc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -217,6 +217,7 @@ namespace Setting extern const SettingsBool export_merge_tree_part_throw_on_pending_mutations; extern const SettingsBool export_merge_tree_part_throw_on_pending_patch_parts; extern const SettingsBool export_merge_tree_partition_lock_inside_the_task; + extern const SettingsString export_merge_tree_part_filename_pattern; } namespace MergeTreeSetting @@ -8209,6 +8210,7 @@ void StorageReplicatedMergeTree::exportPartitionToTable(const PartitionCommand & manifest.file_already_exists_policy = query_context->getSettingsRef()[Setting::export_merge_tree_part_file_already_exists_policy].value; + manifest.filename_pattern = query_context->getSettingsRef()[Setting::export_merge_tree_part_filename_pattern].value; ops.emplace_back(zkutil::makeCreateRequest( fs::path(partition_exports_path) / "metadata.json", diff --git a/tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard1_replica1.xml b/tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard1_replica1.xml new file mode 100644 index 000000000000..bae1ce119255 --- /dev/null +++ b/tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard1_replica1.xml @@ -0,0 +1,6 @@ + + + shard1 + replica1 + + diff --git a/tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard2_replica1.xml b/tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard2_replica1.xml new file mode 100644 index 000000000000..fb9a587e736d --- /dev/null +++ b/tests/integration/test_export_replicated_mt_partition_to_object_storage/configs/macros_shard2_replica1.xml @@ -0,0 +1,6 @@ + + + shard2 + replica1 + + diff --git a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py index b10349c35190..eb8effb66660 100644 --- a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py +++ b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py @@ -118,6 +118,26 @@ def cluster(): with_zookeeper=True, keeper_required_feature_flags=["multi_read"], ) + # Sharded instances for filename pattern tests + cluster.add_instance( + "shard1_replica1", + main_configs=["configs/named_collections.xml", "configs/allow_experimental_export_partition.xml", "configs/macros_shard1_replica1.xml"], + user_configs=["configs/users.d/profile.xml"], + with_minio=True, + stay_alive=True, + with_zookeeper=True, + keeper_required_feature_flags=["multi_read"], + ) + + cluster.add_instance( + "shard2_replica1", + main_configs=["configs/named_collections.xml", "configs/allow_experimental_export_partition.xml", "configs/macros_shard2_replica1.xml"], + user_configs=["configs/users.d/profile.xml"], + with_minio=True, + stay_alive=True, + with_zookeeper=True, + keeper_required_feature_flags=["multi_read"], + ) logging.info("Starting cluster...") cluster.start() yield cluster @@ -161,6 +181,14 @@ def create_tables_and_insert_data(node, mt_table, s3_table, replica_name): create_s3_table(node, s3_table) +def create_sharded_tables_and_insert_data(node, mt_table, s3_table, replica_name): + """Create sharded ReplicatedMergeTree table with {shard} macro in ZooKeeper path.""" + node.query(f"CREATE TABLE {mt_table} (id UInt64, year UInt16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{{shard}}/{mt_table}', '{replica_name}') PARTITION BY year ORDER BY tuple()") + node.query(f"INSERT INTO {mt_table} VALUES (1, 2020), (2, 2020), (3, 2020), (4, 2021)") + + create_s3_table(node, s3_table) + + def test_restart_nodes_during_export(cluster): skip_if_remote_database_disk_enabled(cluster) node = cluster.instances["replica1"] @@ -1148,3 +1176,82 @@ def test_export_partition_with_mixed_computed_columns(cluster): AND partition_id = '1' """) assert status.strip() == "COMPLETED", f"Expected COMPLETED status, got: {status}" + + +def test_sharded_export_partition_with_filename_pattern(cluster): + """Test that export partition with filename pattern prevents collisions in sharded setup.""" + shard1_r1 = cluster.instances["shard1_replica1"] + shard2_r1 = cluster.instances["shard2_replica1"] + watcher_node = cluster.instances["watcher_node"] + + mt_table = "sharded_mt_table" + s3_table = "sharded_s3_table" + + # Create sharded tables on all shards with same partition data (same part names) + # Each shard uses different ZooKeeper path via {shard} macro + create_sharded_tables_and_insert_data(shard1_r1, mt_table, s3_table, "replica1") + create_sharded_tables_and_insert_data(shard2_r1, mt_table, s3_table, "replica1") + create_s3_table(watcher_node, s3_table) + + # Export partition from both shards with filename pattern including shard + # This should prevent filename collisions + shard1_r1.query( + f"ALTER TABLE {mt_table} EXPORT PARTITION ID '2020' TO TABLE {s3_table} " + f"SETTINGS export_merge_tree_part_filename_pattern = '{{part_name}}_{{shard}}_{{replica}}_{{checksum}}'" + ) + shard2_r1.query( + f"ALTER TABLE {mt_table} EXPORT PARTITION ID '2020' TO TABLE {s3_table} " + f"SETTINGS export_merge_tree_part_filename_pattern = '{{part_name}}_{{shard}}_{{replica}}_{{checksum}}'" + ) + + # Wait for exports to complete + wait_for_export_status(shard1_r1, mt_table, s3_table, "2020", "COMPLETED") + wait_for_export_status(shard2_r1, mt_table, s3_table, "2020", "COMPLETED") + + total_count = watcher_node.query(f"SELECT count() FROM {s3_table} WHERE year = 2020").strip() + assert total_count == "6", f"Expected 6 total rows (3 from each shard), got {total_count}" + + # Verify filenames contain shard information (check via S3 directly) + # Get all files from S3 - query from watcher_node since S3 is shared + files_shard1 = watcher_node.query( + f"SELECT _file FROM s3(s3_conn, filename='{s3_table}/**', format='One') WHERE _file LIKE '%shard1%' LIMIT 1" + ).strip() + files_shard2 = watcher_node.query( + f"SELECT _file FROM s3(s3_conn, filename='{s3_table}/**', format='One') WHERE _file LIKE '%shard2%' LIMIT 1" + ).strip() + + # Both shards should have files with their shard names + assert "shard1" in files_shard1 or files_shard1 == "", f"Expected shard1 in filenames, got: {files_shard1}" + assert "shard2" in files_shard2 or files_shard2 == "", f"Expected shard2 in filenames, got: {files_shard2}" + + +def test_sharded_export_partition_default_pattern(cluster): + shard1_r1 = cluster.instances["shard1_replica1"] + shard2_r1 = cluster.instances["shard2_replica1"] + watcher_node = cluster.instances["watcher_node"] + + mt_table = "sharded_mt_table_default" + s3_table = "sharded_s3_table_default" + + # Create sharded tables with different ZooKeeper paths per shard + create_sharded_tables_and_insert_data(shard1_r1, mt_table, s3_table, "replica1") + create_sharded_tables_and_insert_data(shard2_r1, mt_table, s3_table, "replica1") + create_s3_table(watcher_node, s3_table) + + # Export with default pattern ({part_name}_{checksum}) - may cause collisions if parts have same name and the same checksum + shard1_r1.query( + f"ALTER TABLE {mt_table} EXPORT PARTITION ID '2020' TO TABLE {s3_table}" + ) + shard2_r1.query( + f"ALTER TABLE {mt_table} EXPORT PARTITION ID '2020' TO TABLE {s3_table}" + ) + + wait_for_export_status(shard1_r1, mt_table, s3_table, "2020", "COMPLETED") + wait_for_export_status(shard2_r1, mt_table, s3_table, "2020", "COMPLETED") + + # Both exports should complete (even if there are collisions, the overwrite policy handles it) + # S3 tables are shared, so query from watcher_node + total_count = watcher_node.query(f"SELECT count() FROM {s3_table} WHERE year = 2020").strip() + + # only one file with 3 rows should be present + assert int(total_count) == 3, f"Expected 3 rows, got {total_count}" diff --git a/tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.reference b/tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.reference new file mode 100644 index 000000000000..8016f5aa113e --- /dev/null +++ b/tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.reference @@ -0,0 +1,16 @@ +---- Test: Default pattern {part_name}_{checksum} +1 2020 +2 2020 +3 2020 +---- Verify filename matches 2020_1_1_0_*.1.parquet +1 +---- Test: Custom prefix pattern +4 2021 +---- Verify filename matches myprefix_2021_2_2_0.1.parquet +1 +---- Test: Pattern with macros +1 2020 +2 2020 +3 2020 +---- Verify macros expanded (no literal braces in parquet filenames, that's the best we can do for stateless tests) +1 diff --git a/tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.sh b/tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.sh new file mode 100755 index 000000000000..12b47f4f2664 --- /dev/null +++ b/tests/queries/0_stateless/03608_export_merge_tree_part_filename_pattern.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires s3 storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +R=$RANDOM +mt="mt_${R}" +dest1="fp_dest1_${R}" +dest2="fp_dest2_${R}" +dest3="fp_dest3_${R}" + +query() { + $CLICKHOUSE_CLIENT --query "$1" +} + +query "DROP TABLE IF EXISTS $mt, $dest1, $dest2, $dest3" + +query "CREATE TABLE $mt (id UInt64, year UInt16) ENGINE = MergeTree() PARTITION BY year ORDER BY tuple()" +query "INSERT INTO $mt VALUES (1, 2020), (2, 2020), (3, 2020), (4, 2021)" + +query "CREATE TABLE $dest1 (id UInt64, year UInt16) ENGINE = S3(s3_conn, filename='$dest1', format=Parquet, partition_strategy='hive') PARTITION BY year" +query "CREATE TABLE $dest2 (id UInt64, year UInt16) ENGINE = S3(s3_conn, filename='$dest2', format=Parquet, partition_strategy='hive') PARTITION BY year" +query "CREATE TABLE $dest3 (id UInt64, year UInt16) ENGINE = S3(s3_conn, filename='$dest3', format=Parquet, partition_strategy='hive') PARTITION BY year" + +echo "---- Test: Default pattern {part_name}_{checksum}" +query "ALTER TABLE $mt EXPORT PART '2020_1_1_0' TO TABLE $dest1 SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_filename_pattern = '{part_name}_{checksum}'" +sleep 3 +query "SELECT * FROM $dest1 ORDER BY id" +echo "---- Verify filename matches 2020_1_1_0_*.1.parquet" +query "SELECT count() FROM s3(s3_conn, filename='$dest1/**/2020_1_1_0_*.1.parquet', format='One')" + +echo "---- Test: Custom prefix pattern" +query "ALTER TABLE $mt EXPORT PART '2021_2_2_0' TO TABLE $dest2 SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_filename_pattern = 'myprefix_{part_name}'" +sleep 3 +query "SELECT * FROM $dest2 ORDER BY id" +echo "---- Verify filename matches myprefix_2021_2_2_0.1.parquet" +query "SELECT count() FROM s3(s3_conn, filename='$dest2/**/myprefix_2021_2_2_0.1.parquet', format='One')" + +echo "---- Test: Pattern with macros" +query "ALTER TABLE $mt EXPORT PART '2020_1_1_0' TO TABLE $dest3 SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_filename_pattern = '{database}_{table}_{part_name}'" +sleep 3 +query "SELECT * FROM $dest3 ORDER BY id" +echo "---- Verify macros expanded (no literal braces in parquet filenames, that's the best we can do for stateless tests)" +query "SELECT count() = 0 FROM s3(s3_conn, filename='$dest3/**/*.1.parquet', format='One') WHERE _file LIKE '%{%'" + +query "DROP TABLE IF EXISTS $mt, $dest1, $dest2, $dest3" From 83c476f2bf98cd6927678098933a2e9bf46832c4 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 9 Mar 2026 10:39:25 -0300 Subject: [PATCH 2/5] add docs --- .../engines/table-engines/mergetree-family/part_export.md | 7 +++++++ .../table-engines/mergetree-family/partition_export.md | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/part_export.md b/docs/en/engines/table-engines/mergetree-family/part_export.md index 287e0a17f3af..b7f0730596a4 100644 --- a/docs/en/engines/table-engines/mergetree-family/part_export.md +++ b/docs/en/engines/table-engines/mergetree-family/part_export.md @@ -84,6 +84,13 @@ In case a table function is used as the destination, the schema can be omitted a - **Default**: `true` - **Description**: If set to true, throws if pending patch parts exists for a given part. Note that by default mutations are applied to all parts, which means that if a mutation in practice would only affetct part/partition x, all the other parts/partition will throw upon export. The exception is when the `IN PARTITION` clause was used in the mutation command. Note the `IN PARTITION` clause is not properly implemented for plain MergeTree tables. +### export_merge_tree_part_filename_pattern + +- **Type**: `String` +- **Default**: `{part_name}_{checksum}` +- **Description**: Pattern for the filename of the exported merge tree part. The `part_name` and `checksum` are calculated and replaced on the fly. Additional macros are supported. + + ## Examples ### Basic Export to S3 diff --git a/docs/en/engines/table-engines/mergetree-family/partition_export.md b/docs/en/engines/table-engines/mergetree-family/partition_export.md index d91f226dbbf6..af503ec5180a 100644 --- a/docs/en/engines/table-engines/mergetree-family/partition_export.md +++ b/docs/en/engines/table-engines/mergetree-family/partition_export.md @@ -82,6 +82,12 @@ TO TABLE [destination_database.]destination_table - **Default**: `true` - **Description**: If set to true, throws if pending patch parts exists for a given part. Note that by default mutations are applied to all parts, which means that if a mutation in practice would only affetct part/partition x, all the other parts/partition will throw upon export. The exception is when the `IN PARTITION` clause was used in the mutation command. Note the `IN PARTITION` clause is not properly implemented for plain MergeTree tables. +### export_merge_tree_part_filename_pattern + +- **Type**: `String` +- **Default**: `{part_name}_{checksum}` +- **Description**: Pattern for the filename of the exported merge tree part. The `part_name` and `checksum` are calculated and replaced on the fly. Additional macros are supported. + ## Examples ### Basic Export to S3 From 97debff3e88ba6f3a31e8e7b6aa84193e5bf2c17 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 9 Mar 2026 12:01:31 -0300 Subject: [PATCH 3/5] fix database replicated --- src/Storages/MergeTree/ExportPartTask.cpp | 11 +++ .../test.py | 80 +++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/src/Storages/MergeTree/ExportPartTask.cpp b/src/Storages/MergeTree/ExportPartTask.cpp index 5f619f1df6be..20e8d52f03a1 100644 --- a/src/Storages/MergeTree/ExportPartTask.cpp +++ b/src/Storages/MergeTree/ExportPartTask.cpp @@ -19,6 +19,7 @@ #include "Common/setThreadName.h" #include #include +#include #include #include #include @@ -109,6 +110,16 @@ namespace Macros::MacroExpansionInfo macro_info; macro_info.table_id = storage_id; + + if (auto database = DatabaseCatalog::instance().tryGetDatabase(storage_id.database_name)) + { + if (const auto replicated = dynamic_cast(database.get())) + { + macro_info.shard = replicated->getShardName(); + macro_info.replica = replicated->getReplicaName(); + } + } + filename = local_context->getMacros()->expand(filename, macro_info); return filename; diff --git a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py index eb8effb66660..4bc032a71224 100644 --- a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py +++ b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py @@ -1225,6 +1225,86 @@ def test_sharded_export_partition_with_filename_pattern(cluster): assert "shard2" in files_shard2 or files_shard2 == "", f"Expected shard2 in filenames, got: {files_shard2}" +def test_export_partition_from_replicated_database_uses_db_shard_replica_macros(cluster): + """Test that {shard} and {replica} in the filename pattern are expanded from the + DatabaseReplicated identity, NOT from server config macros. + + replica1 has no / entries in its server config section. + Without the fix buildDestinationFilename() leaves macro_info.shard/replica unset, so + Macros::expand() falls through to the config-macros lookup and throws NO_ELEMENTS_IN_CONFIG. + With the fix the DatabaseReplicated shard_name / replica_name are injected into macro_info + before the expand call, and the pattern resolves correctly. + """ + node = cluster.instances["replica1"] + watcher_node = cluster.instances["watcher_node"] + + postfix = str(uuid.uuid4()).replace("-", "_") + db_name = f"repdb_{postfix}" + table_name = "mt_table" + s3_table = f"s3_dbreplicated_{postfix}" + + # These values exist only in the DatabaseReplicated definition – they are NOT + # present anywhere in replica1's server config . + db_shard = "db_shard_x" + db_replica = "db_replica_y" + + node.query( + f"CREATE DATABASE {db_name} " + f"ENGINE = Replicated('/clickhouse/databases/{db_name}', '{db_shard}', '{db_replica}')") + + node.query(f""" + CREATE TABLE {db_name}.{table_name} + (id UInt64, year UInt16) + ENGINE = ReplicatedMergeTree() + PARTITION BY year ORDER BY tuple()""") + + node.query(f"INSERT INTO {db_name}.{table_name} VALUES (1, 2020), (2, 2020), (3, 2020)") + # Stop merges so part names stay stable during the test. + node.query(f"SYSTEM STOP MERGES {db_name}.{table_name}") + + node.query( + f"CREATE TABLE {s3_table} (id UInt64, year UInt16) " + f"ENGINE = S3(s3_conn, filename='{s3_table}', format=Parquet, partition_strategy='hive') " + f"PARTITION BY year") + + watcher_node.query( + f"CREATE TABLE {s3_table} (id UInt64, year UInt16) " + f"ENGINE = S3(s3_conn, filename='{s3_table}', format=Parquet, partition_strategy='hive') " + f"PARTITION BY year") + + # Export with {shard} and {replica} in the pattern. + # Before the fix: Macros::expand throws NO_ELEMENTS_IN_CONFIG because replica1 has + # no / server config macros. + # After the fix: DatabaseReplicated's shard_name/replica_name are wired into + # macro_info before the expand call, so this succeeds and produces the right names. + node.query( + f"ALTER TABLE {db_name}.{table_name} EXPORT PARTITION ID '2020' TO TABLE {s3_table} " + f"SETTINGS export_merge_tree_part_filename_pattern = " + f"'{{part_name}}_{{shard}}_{{replica}}_{{checksum}}'") + + # A FAILED status here almost certainly means the macro expansion threw + # NO_ELEMENTS_IN_CONFIG (i.e. the fix is missing or broken). + wait_for_export_status(node, table_name, s3_table, "2020", "COMPLETED") + + # Data should have landed in S3. + count = watcher_node.query(f"SELECT count() FROM {s3_table} WHERE year = 2020").strip() + assert count == "3", f"Expected 3 exported rows, got {count}" + + # The exported filename must contain the exact shard and replica names from the + # DatabaseReplicated definition, proving the fix injected them (not server config macros). + filename = watcher_node.query( + f"SELECT _file FROM s3(s3_conn, filename='{s3_table}/**/*.parquet', format='One') LIMIT 1" + ).strip() + + assert db_shard in filename, ( + f"Expected filename to contain DatabaseReplicated shard '{db_shard}', got: {filename!r}. " + "Suggests {shard} was not expanded from the DatabaseReplicated identity.") + + assert db_replica in filename, ( + f"Expected filename to contain DatabaseReplicated replica '{db_replica}', got: {filename!r}. " + "Suggests {replica} was not expanded from the DatabaseReplicated identity.") + + def test_sharded_export_partition_default_pattern(cluster): shard1_r1 = cluster.instances["shard1_replica1"] shard2_r1 = cluster.instances["shard2_replica1"] From 2ca197c9e8619143570d03d3c2966e9355e91d6a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 9 Mar 2026 12:02:51 -0300 Subject: [PATCH 4/5] settings history --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index bfe9d06ea24e..850b62776ff3 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -44,6 +44,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory() {"iceberg_partition_timezone", "", "", "New setting."}, // {"object_storage_max_nodes", 0, 0, "Antalya: New setting"}, {"s3_propagate_credentials_to_other_storages", false, false, "New setting"}, + {"export_merge_tree_part_filename_pattern", "", "{part_name}_{checksum}", "New setting"}, }); addSettingsChanges(settings_changes_history, "26.1", { From 4120b578d87fa7d5bc47bbb553d3487bff761fae Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 10 Mar 2026 09:21:26 -0300 Subject: [PATCH 5/5] skip test --- .../test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py index 4bc032a71224..5b96e1ad286b 100644 --- a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py +++ b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py @@ -1235,6 +1235,12 @@ def test_export_partition_from_replicated_database_uses_db_shard_replica_macros( With the fix the DatabaseReplicated shard_name / replica_name are injected into macro_info before the expand call, and the pattern resolves correctly. """ + + # The remote disk test suite sets the shard and replica macros in https://github.com/Altinity/ClickHouse/blob/bbabcaa96e8b7fe8f70ecd0bd4f76fb0f76f2166/tests/integration/helpers/cluster.py#L4356 + # When expanding the macros, the configured ones are preferred over the ones from the DatabaseReplicated definition. + # Therefore, this test fails. It is easier to skip it than to fix it. + skip_if_remote_database_disk_enabled(cluster) + node = cluster.instances["replica1"] watcher_node = cluster.instances["watcher_node"]