Skip to content

Commit

Permalink
fix truncate in middle of unicode char
Browse files Browse the repository at this point in the history
  • Loading branch information
kindly committed Jan 25, 2024
1 parent 6e6b4cb commit 7d41da9
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2587,7 +2587,11 @@ impl FlatFiles {

if cell.len() > 32767 {
log::warn!("WARNING: Cell larger than 32767 chararcters which is too large for XLSX format. The cell will be truncated, so some data will be missing.");
cell.truncate(32767)
let mut index: usize = 32767;
while !cell.is_char_boundary(index) {
index -= 1;
}
cell.truncate(index)
}

if metadata.describers[order].guess_type().0 == "number" {
Expand Down Expand Up @@ -4377,6 +4381,15 @@ mod tests {
)
}

// #[test]
// fn test_is_char_boundry() {
// test_output(
// "fixtures/is_char_boundry.txt",
// vec![],
// json!({"ndjson": true, "xlsx": true}),
// )
// }

#[test]
fn test_s3_input() {
if std::env::var("AWS_DEFAULT_REGION").is_ok() {
Expand Down

0 comments on commit 7d41da9

Please sign in to comment.