diff --git a/algorithms/arithmetic/cpp/main.cpp b/algorithms/arithmetic/cpp/main.cpp index 0bca536..7afd1cc 100644 --- a/algorithms/arithmetic/cpp/main.cpp +++ b/algorithms/arithmetic/cpp/main.cpp @@ -147,6 +147,7 @@ std::vector arithmetic_encode_buffer(const std::vector& input) } std::vector out; + out.reserve(input.size() + compresskit::INITIAL_ENCODE_OVERHEAD); compresskit::write_frequency_header(out, compresskit::ARITHMETIC_MAGIC, freq); compresskit::BitWriter writer; @@ -176,11 +177,10 @@ std::vector arithmetic_decode_buffer(const std::vector& input) throw std::runtime_error("arithmetic: invalid frequency table"); } - std::vector payload(input.begin() + pos, input.end()); - compresskit::BitReader reader(payload); + std::vector out; + compresskit::BitReader reader(input.data() + pos, input.size() - pos); ArithmeticDecoder decoder(reader); - std::vector out; for (;;) { uint32_t sym = decoder.decode_symbol(cumulative); if (sym == compresskit::EOF_SYMBOL) { diff --git a/algorithms/huffman/cpp/main.cpp b/algorithms/huffman/cpp/main.cpp index 6da343e..d7e71e0 100644 --- a/algorithms/huffman/cpp/main.cpp +++ b/algorithms/huffman/cpp/main.cpp @@ -124,13 +124,14 @@ void build_decode_table(const std::vector& nodes, int32_t root, for (uint32_t b = 0; b < compresskit::BYTE_VALUES; ++b) { DecodeEntry& e = table[node][b]; int32_t cur = node; + bool corrupt = false; for (int bit = compresskit::BITS_PER_BYTE - 1; bit >= 0; --bit) { int v = (b >> bit) & 1; cur = (v == 0) ? nodes[cur].left : nodes[cur].right; if (cur < 0) { // Corrupt stream during table build: shouldn't happen for valid trees. e.count = 0; - e.next = root; + corrupt = true; break; } if (is_leaf(nodes, cur)) { @@ -138,7 +139,7 @@ void build_decode_table(const std::vector& nodes, int32_t root, cur = root; } } - e.next = cur; + e.next = corrupt ? root : cur; } } } diff --git a/algorithms/rle/cpp/main.cpp b/algorithms/rle/cpp/main.cpp index b1b2699..839a2ac 100644 --- a/algorithms/rle/cpp/main.cpp +++ b/algorithms/rle/cpp/main.cpp @@ -15,6 +15,7 @@ std::vector rle_encode_buffer(const std::vector& input) { std::vector out; + out.reserve(input.size() / 8 + compresskit::MAGIC_SIZE + compresskit::RLE_PAIR_SIZE); compresskit::write_magic(out, compresskit::RLE_MAGIC); if (input.empty()) { diff --git a/algorithms/shared/cpp/include/compresskit/bit_io.hpp b/algorithms/shared/cpp/include/compresskit/bit_io.hpp index 39a0bee..e3d79a8 100644 --- a/algorithms/shared/cpp/include/compresskit/bit_io.hpp +++ b/algorithms/shared/cpp/include/compresskit/bit_io.hpp @@ -44,9 +44,10 @@ class BitWriter { // MSB-first bit reader. Returns 0 for bits read past the end of the stream. class BitReader { public: - explicit BitReader(const std::vector& data) : data_(data) {} + explicit BitReader(const std::vector& data) : data_(data.data()), size_(data.size()) {} + BitReader(const uint8_t* data, std::size_t size) : data_(data), size_(size) {} int read_bit() { - if (byte_pos_ >= data_.size()) { + if (byte_pos_ >= size_) { return 0; } int bit = (data_[byte_pos_] >> ((BITS_PER_BYTE - 1) - bit_pos_)) & 1; @@ -56,10 +57,11 @@ class BitReader { } return bit; } - bool eof() const { return byte_pos_ >= data_.size(); } + bool eof() const { return byte_pos_ >= size_; } private: - const std::vector& data_; + const uint8_t* data_; + std::size_t size_; std::size_t byte_pos_ = 0; int bit_pos_ = 0; }; diff --git a/tests/conformance/run_cli_smoke.py b/tests/conformance/run_cli_smoke.py index 9b09371..3a7f62c 100644 --- a/tests/conformance/run_cli_smoke.py +++ b/tests/conformance/run_cli_smoke.py @@ -25,6 +25,7 @@ ROOT / "tests/data/empty.bin", ROOT / "tests/data/single_byte.bin", ROOT / "tests/data/alternating.bin", + ROOT / "tests/data/all_same_byte.bin", ROOT / "tests/data/small_dictionary_like.bin", ) diff --git a/tests/gen_testdata.py b/tests/gen_testdata.py index 12de9a6..f217544 100644 --- a/tests/gen_testdata.py +++ b/tests/gen_testdata.py @@ -12,6 +12,7 @@ # - empty.bin 空文件 # - single_byte.bin 单字节边界样本 # - alternating.bin 交替字节模式 +# - all_same_byte.bin 全相同字节样本(RLE/Huffman 边界情况) # - small_dictionary_like.bin 小型重复词典风格样本 ROOT = Path(__file__).resolve().parent.parent @@ -87,6 +88,7 @@ def main(): generate_literal_file(DATA_DIR / "empty.bin", b"") generate_literal_file(DATA_DIR / "single_byte.bin", b"\x00") generate_literal_file(DATA_DIR / "alternating.bin", (b"\xAA\x55" * 512)) + generate_literal_file(DATA_DIR / "all_same_byte.bin", (b"\x00" * 4096)) generate_literal_file( DATA_DIR / "small_dictionary_like.bin", (b"compresskit-dict-alpha\n" * 128)