From 31a475ab8595de84d75a94d1c43d08494103d017 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 9 Sep 2024 08:50:08 -0400 Subject: [PATCH] LibCompress: Speed up CanonicalCode::read_symbol() slow path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbols that need <= 8 bits hit a fast path as of #18075, but the slow path has done a full binary search over all symbols ever since this code was added in #2963. (#3405 even added a FIXME for doing this, but #18075 removed it.) Instead of doing a binary search over all codes for every single bit read, this implements the Moffat-Turpin approach described at https://www.hanshq.net/zip.html#huffdec, which only requires a table read per bit. hyperfine 'Build/lagom/bin/unzip ~/Downloads/enwik8.zip' 1.008 s ± 0.016 s => 957.7 ms ± 3.9 ms, 5% faster Due to issue #25005, we can't peek the full 15 bits at once but have to read them one-by-one. This makes the code look a bit different than in the linked article. I also tried not changing CanonicalCode::from_bytes() too much. It does 16 passes over all symbols. I think it could do it in a single pass instead. But that's for a future change. No behavior change (other than slightly faster perf). --- Userland/Libraries/LibCompress/Deflate.cpp | 35 +++++++++++++++------- Userland/Libraries/LibCompress/Deflate.h | 4 ++- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/Userland/Libraries/LibCompress/Deflate.cpp b/Userland/Libraries/LibCompress/Deflate.cpp index a952361443a2564..0554f4dc7a48860 100644 --- a/Userland/Libraries/LibCompress/Deflate.cpp +++ b/Userland/Libraries/LibCompress/Deflate.cpp @@ -86,11 +86,17 @@ ErrorOr CanonicalCode::from_bytes(ReadonlyBytes bytes) Array prefix_codes; size_t number_of_prefix_codes = 0; + code.m_first_symbol_of_length_after.append(0); + code.m_offset_to_first_symbol_index.append(0); + auto next_code = 0; for (size_t code_length = 1; code_length <= 15; ++code_length) { next_code <<= 1; auto start_bit = 1 << code_length; + auto first_code_at_length = next_code; + auto first_symbol_index_at_length = code.m_symbol_values.size(); + for (size_t symbol = 0; symbol < bytes.size(); ++symbol) { if (bytes[symbol] != code_length) continue; @@ -98,6 +104,8 @@ ErrorOr CanonicalCode::from_bytes(ReadonlyBytes bytes) if (next_code > start_bit) return Error::from_string_literal("Failed to decode code lengths"); + code.m_symbol_values.append(symbol); + if (code_length <= CanonicalCode::max_allowed_prefixed_code_length) { if (number_of_prefix_codes >= prefix_codes.size()) return Error::from_string_literal("Invalid canonical Huffman code"); @@ -108,9 +116,6 @@ ErrorOr CanonicalCode::from_bytes(ReadonlyBytes bytes) prefix_code.code_length = code_length; code.m_max_prefixed_code_length = code_length; - } else { - code.m_symbol_codes.append(start_bit | next_code); - code.m_symbol_values.append(symbol); } if (code.m_bit_codes.size() < symbol + 1) { @@ -122,6 +127,15 @@ ErrorOr CanonicalCode::from_bytes(ReadonlyBytes bytes) next_code++; } + + u32 sentinel = next_code; + code.m_first_symbol_of_length_after.append(sentinel); + VERIFY(code.m_first_symbol_of_length_after[code_length] == sentinel); + + if (code.m_symbol_values.size() > first_symbol_index_at_length) + code.m_offset_to_first_symbol_index.append(first_symbol_index_at_length - first_code_at_length); + else + code.m_offset_to_first_symbol_index.append(0); // Never evaluated. } if (next_code != (1 << 15)) @@ -152,15 +166,14 @@ ErrorOr CanonicalCode::read_symbol(LittleEndianInputBitStream& stream) cons return symbol_value; } - auto code_bits = TRY(stream.read_bits(m_max_prefixed_code_length)); - code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length); - code_bits |= 1 << m_max_prefixed_code_length; - - for (size_t i = m_max_prefixed_code_length; i < 16; ++i) { - size_t index; - if (binary_search(m_symbol_codes.span(), code_bits, &index)) - return m_symbol_values[index]; + auto code_bits = TRY(stream.read_bits(m_max_prefixed_code_length + 1)); + code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length + 1); + for (size_t i = m_max_prefixed_code_length + 1; i <= 15; ++i) { + if (code_bits < m_first_symbol_of_length_after[i]) { + auto symbol_index = (uint16_t)(m_offset_to_first_symbol_index[i] + code_bits); + return m_symbol_values[symbol_index]; + } code_bits = code_bits << 1 | TRY(stream.read_bit()); } diff --git a/Userland/Libraries/LibCompress/Deflate.h b/Userland/Libraries/LibCompress/Deflate.h index 20ad4aa4f585b34..4164f72244bdbb5 100644 --- a/Userland/Libraries/LibCompress/Deflate.h +++ b/Userland/Libraries/LibCompress/Deflate.h @@ -39,9 +39,11 @@ class CanonicalCode { }; // Decompression - indexed by code - Vector m_symbol_codes; Vector m_symbol_values; + Vector m_first_symbol_of_length_after; + Vector m_offset_to_first_symbol_index; + Array m_prefix_table {}; size_t m_max_prefixed_code_length { 0 };