From f791d8ff849a63592bed8850ed9f64abba0ab35c Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 2 Mar 2026 01:14:00 +0000 Subject: [PATCH] fix(builtins): use char-based precision in printf for UTF-8 safety printf %.*s was using byte slicing which panics when the truncation point falls inside a multi-byte UTF-8 character. Now uses chars().take(prec) for safe char-based truncation. Closes #435 https://claude.ai/code/session_01WZjYqxm5xMPAEe7FSHJkDy --- crates/bashkit/src/builtins/printf.rs | 31 ++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/crates/bashkit/src/builtins/printf.rs b/crates/bashkit/src/builtins/printf.rs index ed7afa61..cd396049 100644 --- a/crates/bashkit/src/builtins/printf.rs +++ b/crates/bashkit/src/builtins/printf.rs @@ -161,8 +161,12 @@ impl FormatSpec { /// Format a string with the parsed spec fn format_str(&self, s: &str) -> String { + // TM-UNI-016: Use char-based truncation, not byte-based, to avoid + // panics when precision falls inside a multi-byte UTF-8 character. + let truncated; let s = if let Some(prec) = self.precision { - &s[..s.len().min(prec)] + truncated = s.chars().take(prec).collect::(); + truncated.as_str() } else { s }; @@ -620,4 +624,29 @@ mod tests { assert_eq!(expand_escapes("\\u03bc"), "\u{03bc}"); assert_eq!(expand_escapes("\\U000003bc"), "\u{03bc}"); } + + // Issue #435: precision should use char count, not byte count + #[test] + fn test_precision_multibyte_utf8() { + // "café" = 4 chars, 5 bytes. %.3s should give "caf", not panic. + let args = vec!["café".to_string()]; + let mut idx = 0; + assert_eq!( + format_string("%.3s", &args, &mut idx), + "caf", + "precision should truncate by chars" + ); + } + + #[test] + fn test_precision_cjk() { + // "日本語" = 3 chars, 9 bytes. %.2s should give "日本" + let args = vec!["日本語".to_string()]; + let mut idx = 0; + assert_eq!( + format_string("%.2s", &args, &mut idx), + "日本", + "should handle CJK chars" + ); + } }