From d29ea9b946e4d3ea9b03690b589e9a57cdeeb701 Mon Sep 17 00:00:00 2001 From: Zehua Zou Date: Sat, 7 Feb 2026 19:40:20 +0800 Subject: [PATCH] refactor: use from_chars instead of stoi/stoll/stod --- src/iceberg/avro/avro_schema_util.cc | 9 ++++----- src/iceberg/avro/avro_writer.cc | 2 +- src/iceberg/json_serde.cc | 12 +++++++++--- src/iceberg/metrics_config.cc | 11 ++++++----- src/iceberg/parquet/parquet_schema_util.cc | 13 +++++-------- src/iceberg/parquet/parquet_writer.cc | 2 +- src/iceberg/result.h | 4 ++-- src/iceberg/snapshot.cc | 4 ++-- src/iceberg/test/update_properties_test.cc | 2 +- src/iceberg/transform.cc | 4 +++- src/iceberg/update/snapshot_update.cc | 12 ++++++------ src/iceberg/update/update_properties.cc | 2 +- src/iceberg/util/config.h | 10 ++++++---- src/iceberg/util/decimal.cc | 9 ++++----- src/iceberg/util/string_util.h | 18 ++++++++++++------ 15 files changed, 63 insertions(+), 51 deletions(-) diff --git a/src/iceberg/avro/avro_schema_util.cc b/src/iceberg/avro/avro_schema_util.cc index b19ffce7d..75db6d8d9 100644 --- a/src/iceberg/avro/avro_schema_util.cc +++ b/src/iceberg/avro/avro_schema_util.cc @@ -18,7 +18,6 @@ */ #include -#include #include #include @@ -31,13 +30,12 @@ #include #include "iceberg/avro/avro_constants.h" -#include "iceberg/avro/avro_register.h" #include "iceberg/avro/avro_schema_util_internal.h" #include "iceberg/metadata_columns.h" #include "iceberg/name_mapping.h" #include "iceberg/schema.h" #include "iceberg/schema_util_internal.h" -#include "iceberg/util/formatter.h" +#include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/macros.h" #include "iceberg/util/string_util.h" #include "iceberg/util/visit_type.h" @@ -471,7 +469,7 @@ Result GetId(const ::avro::NodePtr& node, const std::string& attr_name, return InvalidSchema("Missing avro attribute: {}", attr_name); } - return StringUtils::ParseInt(id_str.value()); + return StringUtils::ParseNumber(id_str.value()); } Result GetElementId(const ::avro::NodePtr& node) { @@ -729,7 +727,8 @@ Result ProjectMap(const MapType& map_type, const auto& expected_value_field = map_type.value(); FieldProjection result; - int32_t avro_key_id, avro_value_id; + int32_t avro_key_id; + int32_t avro_value_id; ::avro::NodePtr map_node; if (avro_node->type() == ::avro::AVRO_MAP) { diff --git a/src/iceberg/avro/avro_writer.cc b/src/iceberg/avro/avro_writer.cc index 91b2a93f6..350530b3d 100644 --- a/src/iceberg/avro/avro_writer.cc +++ b/src/iceberg/avro/avro_writer.cc @@ -76,7 +76,7 @@ Result> ParseCodecLevel(const WriterProperties& propertie if (level_str.empty()) { return std::nullopt; } - ICEBERG_ASSIGN_OR_RAISE(auto level, StringUtils::ParseInt(level_str)); + ICEBERG_ASSIGN_OR_RAISE(auto level, StringUtils::ParseNumber(level_str)); return level; } diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index 004e8b3f5..7d6c9ee25 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -47,6 +47,7 @@ #include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/json_util_internal.h" #include "iceberg/util/macros.h" +#include "iceberg/util/string_util.h" #include "iceberg/util/timepoint.h" namespace iceberg { @@ -497,15 +498,20 @@ Result> TypeFromJson(const nlohmann::json& json) { std::regex fixed_regex(R"(fixed\[\s*(\d+)\s*\])"); std::smatch match; if (std::regex_match(type_str, match, fixed_regex)) { - return std::make_unique(std::stoi(match[1].str())); + ICEBERG_ASSIGN_OR_RAISE(auto length, + StringUtils::ParseNumber(match[1].str())); + return std::make_unique(length); } return JsonParseError("Invalid fixed type: {}", type_str); } else if (type_str.starts_with("decimal")) { std::regex decimal_regex(R"(decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))"); std::smatch match; if (std::regex_match(type_str, match, decimal_regex)) { - return std::make_unique(std::stoi(match[1].str()), - std::stoi(match[2].str())); + ICEBERG_ASSIGN_OR_RAISE(auto precision, + StringUtils::ParseNumber(match[1].str())); + ICEBERG_ASSIGN_OR_RAISE(auto scale, + StringUtils::ParseNumber(match[2].str())); + return std::make_unique(precision, scale); } return JsonParseError("Invalid decimal type: {}", type_str); } else { diff --git a/src/iceberg/metrics_config.cc b/src/iceberg/metrics_config.cc index 8e2c7262d..e378640e0 100644 --- a/src/iceberg/metrics_config.cc +++ b/src/iceberg/metrics_config.cc @@ -19,7 +19,6 @@ #include "iceberg/metrics_config.h" -#include #include #include @@ -29,6 +28,8 @@ #include "iceberg/table.h" #include "iceberg/table_properties.h" #include "iceberg/util/checked_cast.h" +#include "iceberg/util/macros.h" +#include "iceberg/util/string_util.h" #include "iceberg/util/type_util.h" namespace iceberg { @@ -84,12 +85,12 @@ Result MetricsMode::FromString(std::string_view mode) { } if (StringUtils::StartsWithIgnoreCase(mode, kTruncatePrefix) && mode.ends_with(")")) { - int32_t length; - auto [ptr, ec] = std::from_chars(mode.data() + 9 /* "truncate(" length */, - mode.data() + mode.size() - 1, length); - if (ec != std::errc{}) { + auto res = StringUtils::ParseNumber( + mode.substr(9 /* "truncate(" length */, mode.size() - 10)); + if (!res.has_value()) { return InvalidArgument("Invalid truncate mode: {}", mode); } + int32_t length = res.value(); if (length == kDefaultTruncateLength) { return kDefaultMetricsMode; } diff --git a/src/iceberg/parquet/parquet_schema_util.cc b/src/iceberg/parquet/parquet_schema_util.cc index 088c15c04..e9574a48c 100644 --- a/src/iceberg/parquet/parquet_schema_util.cc +++ b/src/iceberg/parquet/parquet_schema_util.cc @@ -17,8 +17,6 @@ * under the License. */ -#include - #include #include #include @@ -32,8 +30,9 @@ #include "iceberg/result.h" #include "iceberg/schema_util_internal.h" #include "iceberg/util/checked_cast.h" -#include "iceberg/util/formatter.h" +#include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/macros.h" +#include "iceberg/util/string_util.h" namespace iceberg::parquet { @@ -49,13 +48,11 @@ std::optional FieldIdFromMetadata( return std::nullopt; } std::string field_id_str = metadata->value(key); - int32_t field_id = -1; - auto [_, ec] = std::from_chars(field_id_str.data(), - field_id_str.data() + field_id_str.size(), field_id); - if (ec != std::errc() || field_id < 0) { + auto res = StringUtils::ParseNumber(field_id_str); + if (!res.has_value() || res.value() < 0) { return std::nullopt; } - return field_id; + return res.value(); } std::optional GetFieldId(const ::parquet::arrow::SchemaField& parquet_field) { diff --git a/src/iceberg/parquet/parquet_writer.cc b/src/iceberg/parquet/parquet_writer.cc index bac25c9ef..c874665a6 100644 --- a/src/iceberg/parquet/parquet_writer.cc +++ b/src/iceberg/parquet/parquet_writer.cc @@ -70,7 +70,7 @@ Result> ParseCodecLevel(const WriterProperties& propertie if (level_str.empty()) { return std::nullopt; } - ICEBERG_ASSIGN_OR_RAISE(auto level, StringUtils::ParseInt(level_str)); + ICEBERG_ASSIGN_OR_RAISE(auto level, StringUtils::ParseNumber(level_str)); return level; } diff --git a/src/iceberg/result.h b/src/iceberg/result.h index 223352f61..765508705 100644 --- a/src/iceberg/result.h +++ b/src/iceberg/result.h @@ -88,8 +88,8 @@ using Status = Result; return std::unexpected( \ {ErrorKind::k##name, std::format(fmt, std::forward(args)...)}); \ } \ - inline auto name(const std::string& message) -> std::unexpected { \ - return std::unexpected({ErrorKind::k##name, message}); \ + inline auto name(std::string message) -> std::unexpected { \ + return std::unexpected({ErrorKind::k##name, std::move(message)}); \ } DEFINE_ERROR_FUNCTION(AlreadyExists) diff --git a/src/iceberg/snapshot.cc b/src/iceberg/snapshot.cc index cdbdd4529..1b3182fd9 100644 --- a/src/iceberg/snapshot.cc +++ b/src/iceberg/snapshot.cc @@ -167,7 +167,7 @@ Result> Snapshot::FirstRowId() const { return std::nullopt; } - return StringUtils::ParseInt(it->second); + return StringUtils::ParseNumber(it->second); } Result> Snapshot::AddedRows() const { @@ -176,7 +176,7 @@ Result> Snapshot::AddedRows() const { return std::nullopt; } - return StringUtils::ParseInt(it->second); + return StringUtils::ParseNumber(it->second); } bool Snapshot::Equals(const Snapshot& other) const { diff --git a/src/iceberg/test/update_properties_test.cc b/src/iceberg/test/update_properties_test.cc index 59fa1d8d6..886558153 100644 --- a/src/iceberg/test/update_properties_test.cc +++ b/src/iceberg/test/update_properties_test.cc @@ -107,7 +107,7 @@ TEST_F(UpdatePropertiesTest, UpgradeFormatVersionInvalidString) { auto result = update->Apply(); EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(result, HasErrorMessage("Failed to parse integer from string")); + EXPECT_THAT(result, HasErrorMessage("invalid argument")); } TEST_F(UpdatePropertiesTest, UpgradeFormatVersionOutOfRange) { diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 004111c3f..c210f9ed2 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -31,6 +31,7 @@ #include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" #include "iceberg/util/projection_util_internal.h" +#include "iceberg/util/string_util.h" #include "iceberg/util/transform_util.h" namespace iceberg { @@ -514,7 +515,8 @@ Result> TransformFromString(std::string_view transfor std::smatch match; if (std::regex_match(str, match, param_regex)) { const std::string type_str = match[1]; - const int32_t param = std::stoi(match[2]); + ICEBERG_ASSIGN_OR_RAISE(const auto param, + StringUtils::ParseNumber(match[2].str())); if (type_str == kBucketName) { return Transform::Bucket(param); diff --git a/src/iceberg/update/snapshot_update.cc b/src/iceberg/update/snapshot_update.cc index 38c5129f4..4865278f0 100644 --- a/src/iceberg/update/snapshot_update.cc +++ b/src/iceberg/update/snapshot_update.cc @@ -49,19 +49,19 @@ Status UpdateTotal(std::unordered_map& summary, auto total_it = previous_summary.find(total_property); if (total_it != previous_summary.end()) { ICEBERG_ASSIGN_OR_RAISE(auto new_total, - StringUtils::ParseInt(total_it->second)); + StringUtils::ParseNumber(total_it->second)); auto added_it = summary.find(added_property); if (new_total >= 0 && added_it != summary.end()) { ICEBERG_ASSIGN_OR_RAISE(auto added_value, - StringUtils::ParseInt(added_it->second)); + StringUtils::ParseNumber(added_it->second)); new_total += added_value; } auto deleted_it = summary.find(deleted_property); if (new_total >= 0 && deleted_it != summary.end()) { ICEBERG_ASSIGN_OR_RAISE(auto deleted_value, - StringUtils::ParseInt(deleted_it->second)); + StringUtils::ParseNumber(deleted_it->second)); new_total -= deleted_value; } @@ -276,9 +276,9 @@ Result SnapshotUpdate::Apply() { auto added_records_it = summary.find(SnapshotSummaryFields::kAddedRecords); auto replaced_records_it = summary.find(SnapshotSummaryFields::kDeletedRecords); if (added_records_it != summary.cend() && replaced_records_it != summary.cend()) { - ICEBERG_ASSIGN_OR_RAISE(auto added_records, - StringUtils::ParseInt(added_records_it->second)); - ICEBERG_ASSIGN_OR_RAISE(auto replaced_records, StringUtils::ParseInt( + ICEBERG_ASSIGN_OR_RAISE(auto added_records, StringUtils::ParseNumber( + added_records_it->second)); + ICEBERG_ASSIGN_OR_RAISE(auto replaced_records, StringUtils::ParseNumber( replaced_records_it->second)); ICEBERG_PRECHECK( added_records <= replaced_records, diff --git a/src/iceberg/update/update_properties.cc b/src/iceberg/update/update_properties.cc index fe49df81c..e6fe7a603 100644 --- a/src/iceberg/update/update_properties.cc +++ b/src/iceberg/update/update_properties.cc @@ -85,7 +85,7 @@ Result UpdateProperties::Apply() { auto iter = new_properties.find(TableProperties::kFormatVersion.key()); if (iter != new_properties.end()) { ICEBERG_ASSIGN_OR_RAISE(auto parsed_version, - StringUtils::ParseInt(iter->second)); + StringUtils::ParseNumber(iter->second)); if (parsed_version > TableMetadata::kSupportedTableFormatVersion) { return InvalidArgument( diff --git a/src/iceberg/util/config.h b/src/iceberg/util/config.h index 5adbc96d9..36d566fb9 100644 --- a/src/iceberg/util/config.h +++ b/src/iceberg/util/config.h @@ -24,6 +24,8 @@ #include #include "iceberg/exception.h" +#include "iceberg/util/macros.h" +#include "iceberg/util/string_util.h" namespace iceberg { namespace internal { @@ -50,10 +52,10 @@ U DefaultFromString(const std::string& val) { return val; } else if constexpr (std::is_same_v) { return val == "true"; - } else if constexpr (std::is_signed_v && std::is_integral_v) { - return static_cast(std::stoll(val)); - } else if constexpr (std::is_floating_point_v) { - return static_cast(std::stod(val)); + } else if constexpr ((std::is_signed_v && std::is_integral_v) || + std::is_floating_point_v) { + ICEBERG_ASSIGN_OR_THROW(auto res, StringUtils::ParseNumber(val)); + return res; } else { throw IcebergError( std::format("Explicit from_str() is required for {}", typeid(U).name())); diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc index 433d35869..aeb7c6147 100644 --- a/src/iceberg/util/decimal.cc +++ b/src/iceberg/util/decimal.cc @@ -39,6 +39,7 @@ #include "iceberg/result.h" #include "iceberg/util/int128.h" #include "iceberg/util/macros.h" +#include "iceberg/util/string_util.h" namespace iceberg { @@ -201,13 +202,11 @@ inline void ShiftAndAdd(std::string_view input, uint128_t& out) { for (size_t pos = 0; pos < input.size();) { const size_t group_size = std::min(kInt64DecimalDigits, input.size() - pos); const uint64_t multiple = kUInt64PowersOfTen[group_size]; - uint64_t value = 0; - auto [_, ec] = - std::from_chars(input.data() + pos, input.data() + pos + group_size, value); - ICEBERG_DCHECK(ec == std::errc(), "Failed to parse digits in ShiftAndAdd"); + auto res = StringUtils::ParseNumber(input.substr(pos, group_size)); + ICEBERG_DCHECK(res.has_value(), "Failed to parse digits in ShiftAndAdd"); - out = out * multiple + value; + out = out * multiple + res.value(); pos += group_size; } } diff --git a/src/iceberg/util/string_util.h b/src/iceberg/util/string_util.h index dfedb4a72..3528204ee 100644 --- a/src/iceberg/util/string_util.h +++ b/src/iceberg/util/string_util.h @@ -25,6 +25,7 @@ #include #include #include +#include #include "iceberg/iceberg_export.h" #include "iceberg/result.h" @@ -67,17 +68,22 @@ class ICEBERG_EXPORT StringUtils { } template - static Result ParseInt(std::string_view str) { + requires std::is_arithmetic_v && (!std::same_as) + static Result ParseNumber(std::string_view str) { T value = 0; auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), value); - if (ec == std::errc::invalid_argument) [[unlikely]] { - return InvalidArgument("Failed to parse integer from string '{}': invalid argument", - str); - } else if (ec == std::errc::result_out_of_range) [[unlikely]] { + if (ec == std::errc()) [[likely]] { + return value; + } + if (ec == std::errc::invalid_argument) { + return InvalidArgument("Failed to parse {} from string '{}': invalid argument", + typeid(T).name(), str); + } + if (ec == std::errc::result_out_of_range) { return InvalidArgument("Failed to parse {} from string '{}': value out of range", typeid(T).name(), str); } - return value; + std::unreachable(); } };