Skip to content

Commit

Permalink
LibCompress: Speed up CanonicalCode::read_symbol() slow path
Browse files Browse the repository at this point in the history
Symbols that need <= 8 bits hit a fast path as of SerenityOS#18075, but
the slow path has done a full binary search over all symbols
ever since this code was added in SerenityOS#2963. (SerenityOS#3405 even added a FIXME
for doing this, but SerenityOS#18075 removed it.)

Instead of doing a binary search over all codes for every single
bit read, this implements the Moffat-Turpin approach described at
https://www.hanshq.net/zip.html#huffdec, which only requires a
table read per bit.

    hyperfine 'Build/lagom/bin/unzip ~/Downloads/enwik8.zip'
    1.008 s ± 0.016 s  =>  957.7 ms ± 3.9 ms, 5% faster

Due to issue SerenityOS#25005, we can't peek the full 15 bits at once but
have to read them one-by-one. This makes the code look a bit
different than in the linked article.

I also tried not changing CanonicalCode::from_bytes() too much.
It does 16 passes over all symbols. I think it could do it in
a single pass instead. But that's for a future change.

No behavior change (other than slightly faster perf).
  • Loading branch information
nico committed Sep 9, 2024
1 parent 36b7cac commit 31a475a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
35 changes: 24 additions & 11 deletions Userland/Libraries/LibCompress/Deflate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,26 @@ ErrorOr<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
Array<PrefixCode, 1 << CanonicalCode::max_allowed_prefixed_code_length> prefix_codes;
size_t number_of_prefix_codes = 0;

code.m_first_symbol_of_length_after.append(0);
code.m_offset_to_first_symbol_index.append(0);

auto next_code = 0;
for (size_t code_length = 1; code_length <= 15; ++code_length) {
next_code <<= 1;
auto start_bit = 1 << code_length;

auto first_code_at_length = next_code;
auto first_symbol_index_at_length = code.m_symbol_values.size();

for (size_t symbol = 0; symbol < bytes.size(); ++symbol) {
if (bytes[symbol] != code_length)
continue;

if (next_code > start_bit)
return Error::from_string_literal("Failed to decode code lengths");

code.m_symbol_values.append(symbol);

if (code_length <= CanonicalCode::max_allowed_prefixed_code_length) {
if (number_of_prefix_codes >= prefix_codes.size())
return Error::from_string_literal("Invalid canonical Huffman code");
Expand All @@ -108,9 +116,6 @@ ErrorOr<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
prefix_code.code_length = code_length;

code.m_max_prefixed_code_length = code_length;
} else {
code.m_symbol_codes.append(start_bit | next_code);
code.m_symbol_values.append(symbol);
}

if (code.m_bit_codes.size() < symbol + 1) {
Expand All @@ -122,6 +127,15 @@ ErrorOr<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)

next_code++;
}

u32 sentinel = next_code;
code.m_first_symbol_of_length_after.append(sentinel);
VERIFY(code.m_first_symbol_of_length_after[code_length] == sentinel);

if (code.m_symbol_values.size() > first_symbol_index_at_length)
code.m_offset_to_first_symbol_index.append(first_symbol_index_at_length - first_code_at_length);
else
code.m_offset_to_first_symbol_index.append(0); // Never evaluated.
}

if (next_code != (1 << 15))
Expand Down Expand Up @@ -152,15 +166,14 @@ ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& stream) cons
return symbol_value;
}

auto code_bits = TRY(stream.read_bits<u16>(m_max_prefixed_code_length));
code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length);
code_bits |= 1 << m_max_prefixed_code_length;

for (size_t i = m_max_prefixed_code_length; i < 16; ++i) {
size_t index;
if (binary_search(m_symbol_codes.span(), code_bits, &index))
return m_symbol_values[index];
auto code_bits = TRY(stream.read_bits<u16>(m_max_prefixed_code_length + 1));
code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length + 1);

for (size_t i = m_max_prefixed_code_length + 1; i <= 15; ++i) {
if (code_bits < m_first_symbol_of_length_after[i]) {
auto symbol_index = (uint16_t)(m_offset_to_first_symbol_index[i] + code_bits);
return m_symbol_values[symbol_index];
}
code_bits = code_bits << 1 | TRY(stream.read_bit());
}

Expand Down
4 changes: 3 additions & 1 deletion Userland/Libraries/LibCompress/Deflate.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ class CanonicalCode {
};

// Decompression - indexed by code
Vector<u16, 286> m_symbol_codes;
Vector<u16, 286> m_symbol_values;

Vector<u32, 16> m_first_symbol_of_length_after;
Vector<u16, 16> m_offset_to_first_symbol_index;

Array<PrefixTableEntry, 1 << max_allowed_prefixed_code_length> m_prefix_table {};
size_t m_max_prefixed_code_length { 0 };

Expand Down

0 comments on commit 31a475a

Please sign in to comment.