Skip to content

Commit

Permalink
Properly convert `ISO_8859_1
Browse files Browse the repository at this point in the history
  • Loading branch information
toots committed Jun 26, 2023
1 parent a9419df commit 5eabc2c
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 15 deletions.
44 changes: 33 additions & 11 deletions src/metadataCharEncoding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ module Naive : T = struct
| `UTF_16, `UTF_16
| `UTF_16LE, `UTF_16LE
| `UTF_16BE, `UTF_16BE
| `ISO_8859_1, _ ->
s
| _ ->
let buf = Buffer.create 10 in
let len = String.length s in
Expand All @@ -26,32 +24,56 @@ module Naive : T = struct
in
let unicode_byte_length =
match source with
| `ISO_8859_1 -> assert false
| `ISO_8859_1 -> fun _ -> 1
| `UTF_8 -> Uchar.utf_8_byte_length
| _ -> Uchar.utf_16_byte_length
in
let s, get_unicode_char =
match source with
| `ISO_8859_1 -> assert false
| `UTF_8 -> (s, String.get_utf_8_uchar)
| `UTF_16BE -> (s, String.get_utf_16be_uchar)
| `UTF_16LE -> (s, String.get_utf_16le_uchar)
| `ISO_8859_1 -> (s, fun s pos -> Uchar.of_char s.[pos])
| `UTF_8 ->
( s,
fun s pos ->
Uchar.utf_decode_uchar (String.get_utf_8_uchar s pos) )
| `UTF_16BE ->
( s,
fun s pos ->
Uchar.utf_decode_uchar (String.get_utf_16be_uchar s pos)
)
| `UTF_16LE ->
( s,
fun s pos ->
Uchar.utf_decode_uchar (String.get_utf_16le_uchar s pos)
)
| `UTF_16 ->
let default = ("", String.get_utf_16be_uchar) in
let default =
( "",
fun s pos ->
Uchar.utf_decode_uchar (String.get_utf_16be_uchar s pos)
)
in
if len < 2 then default
else (
let rem = String.sub s 2 (len - 2) in
match (s.[0], s.[1]) with
| '\xfe', '\xff' -> (rem, String.get_utf_16be_uchar)
| '\xff', '\xfe' -> (rem, String.get_utf_16le_uchar)
| '\xfe', '\xff' ->
( rem,
fun s pos ->
Uchar.utf_decode_uchar
(String.get_utf_16be_uchar s pos) )
| '\xff', '\xfe' ->
( rem,
fun s pos ->
Uchar.utf_decode_uchar
(String.get_utf_16le_uchar s pos) )
| _ -> default)
in
if target = `UTF_16 then add_unicode_char buf Uchar.bom;
let len = String.length s in
let rec f pos =
if pos = len then Buffer.contents buf
else (
let c = Uchar.utf_decode_uchar (get_unicode_char s pos) in
let c = get_unicode_char s pos in
add_unicode_char buf c;
f (pos + unicode_byte_length c))
in
Expand Down
8 changes: 4 additions & 4 deletions src/metadataID3v1.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ let parse ?(recode = MetadataCharEncoding.Naive.convert) f : metadata =
let size = match R.size f with Some n -> n | None -> raise Invalid in
R.drop f (size - 128);
if R.read f 3 <> "TAG" then raise Invalid;
let title = R.read f 30 |> trim |> recode in
let artist = R.read f 30 |> trim |> recode in
let album = R.read f 30 |> trim |> recode in
let year = R.read f 4 |> trim |> recode in
let title = R.read f 30 |> trim |> recode ~source:`ISO_8859_1 in
let artist = R.read f 30 |> trim |> recode ~source:`ISO_8859_1 in
let album = R.read f 30 |> trim |> recode ~source:`ISO_8859_1 in
let year = R.read f 4 |> trim |> recode ~source:`ISO_8859_1 in
let comment = R.read f 30 in
let comment, track, genre =
if comment.[27] = '\000' then
Expand Down

0 comments on commit 5eabc2c

Please sign in to comment.