From eeb63b8901a9c049f1bb594abb9ce9b4a9c47620 Mon Sep 17 00:00:00 2001 From: Tiger Wang Date: Mon, 11 Jan 2021 16:39:43 +0000 Subject: zlib -> libdeflate (#5085) + Use libdeflate + Use std::byte * Fix passing temporary to string_view + Emulate make_unique_for_overwrite --- src/StringCompression.cpp | 345 ++++++++++++++++++++++++---------------------- 1 file changed, 177 insertions(+), 168 deletions(-) (limited to 'src/StringCompression.cpp') diff --git a/src/StringCompression.cpp b/src/StringCompression.cpp index ff434bbb1..6678fe1bd 100644 --- a/src/StringCompression.cpp +++ b/src/StringCompression.cpp @@ -1,242 +1,251 @@ // StringCompression.cpp -// Implements the wrapping functions for compression and decompression using AString as their data +// Implements the wrapping functions for compression and decompression #include "Globals.h" +#include "ByteBuffer.h" #include "StringCompression.h" +#include -int CompressString(const char * a_Data, size_t a_Length, AString & a_Compressed, int a_Factor) + +std::string_view Compression::Result::GetStringView() const { - uLongf CompressedSize = compressBound(static_cast(a_Length)); - - // HACK: We're assuming that AString returns its internal buffer in its data() call and we're overwriting that buffer! - // It saves us one allocation and one memcpy of the entire compressed data - // It may not work on some STL implementations! (Confirmed working on all currently used MSVC, GCC and Clang versions) - a_Compressed.resize(CompressedSize); - int errorcode = compress2(reinterpret_cast(const_cast(a_Compressed.data())), &CompressedSize, reinterpret_cast(a_Data), static_cast(a_Length), a_Factor); - if (errorcode != Z_OK) + const auto View = GetView(); + return { reinterpret_cast(View.data()), View.size() }; +} + + + + + +ContiguousByteBufferView Compression::Result::GetView() const +{ + // Get a generic std::byte * to what the variant is currently storing: + return { - return errorcode; - } - a_Compressed.resize(CompressedSize); - return Z_OK; + std::visit([](const auto & Buffer) -> const std::byte * + { + using Variant = std::decay_t; + + if constexpr (std::is_same_v) + { + return Buffer.data(); + } + else + { + return Buffer.get(); + } + }, Storage), Size + }; } -int UncompressString(const char * a_Data, size_t a_Length, AString & a_Uncompressed, size_t a_UncompressedSize) +Compression::Compressor::Compressor(int CompressionFactor) { - // HACK: We're assuming that AString returns its internal buffer in its data() call and we're overwriting that buffer! - // It saves us one allocation and one memcpy of the entire compressed data - // It may not work on some STL implementations! (Confirmed working on all currently used MSVC, GCC and Clang versions) - a_Uncompressed.resize(a_UncompressedSize); - uLongf UncompressedSize = static_cast(a_UncompressedSize); // On some architectures the uLongf is different in size to int, that may be the cause of the -5 error - int errorcode = uncompress(reinterpret_cast(const_cast(a_Uncompressed.data())), &UncompressedSize, reinterpret_cast(a_Data), static_cast(a_Length)); - if (errorcode != Z_OK) + m_Handle = libdeflate_alloc_compressor(CompressionFactor); + + if (m_Handle == nullptr) { - return errorcode; + throw std::bad_alloc(); } - a_Uncompressed.resize(UncompressedSize); - return Z_OK; } -int CompressStringGZIP(const char * a_Data, size_t a_Length, AString & a_Compressed) +Compression::Compressor::~Compressor() { - // Compress a_Data into a_Compressed using GZIP; return Z_XXX error constants same as zlib's compress2() + libdeflate_free_compressor(m_Handle); +} + + - a_Compressed.reserve(a_Length); - char Buffer[64 KiB]; - z_stream strm; - memset(&strm, 0, sizeof(strm)); - strm.next_in = reinterpret_cast(const_cast(a_Data)); - strm.avail_in = static_cast(a_Length); - strm.next_out = reinterpret_cast(Buffer); - strm.avail_out = sizeof(Buffer); - int res = deflateInit2(&strm, 9, Z_DEFLATED, 31, 9, Z_DEFAULT_STRATEGY); - if (res != Z_OK) +template +Compression::Result Compression::Compressor::Compress(const void * const Input, const size_t Size) +{ + // First see if the stack buffer has enough space: { - LOG("%s: compression initialization failed: %d (\"%s\").", __FUNCTION__, res, strm.msg); - return res; + Result::Static Buffer; + const auto BytesWrittenOut = Algorithm(m_Handle, Input, Size, Buffer.data(), Buffer.size()); + + if (BytesWrittenOut != 0) + { + return { Buffer, BytesWrittenOut }; + } } - for (;;) + // No it doesn't. Allocate space on the heap to write the compression result, increasing in powers of 2. + // This will either succeed, or except with bad_alloc. + + auto DynamicCapacity = Result::StaticCapacity * 2; + while (true) { - res = deflate(&strm, Z_FINISH); - switch (res) + auto Dynamic = cpp20::make_unique_for_overwrite(DynamicCapacity); + const auto BytesWrittenOut = Algorithm(m_Handle, Input, Size, Dynamic.get(), DynamicCapacity); + + if (BytesWrittenOut != 0) { - case Z_OK: - { - // Some data has been compressed. Consume the buffer and continue compressing - a_Compressed.append(Buffer, sizeof(Buffer) - strm.avail_out); - strm.next_out = reinterpret_cast(Buffer); - strm.avail_out = sizeof(Buffer); - if (strm.avail_in == 0) - { - // All data has been compressed - deflateEnd(&strm); - return Z_OK; - } - break; - } + return { std::move(Dynamic), BytesWrittenOut }; + } - case Z_STREAM_END: - { - // Finished compressing. Consume the rest of the buffer and return - a_Compressed.append(Buffer, sizeof(Buffer) - strm.avail_out); - deflateEnd(&strm); - return Z_OK; - } + DynamicCapacity *= 2; + } +} - default: - { - // An error has occurred, log it and return the error value - LOG("%s: compression failed: %d (\"%s\").", __FUNCTION__, res, strm.msg); - deflateEnd(&strm); - return res; - } - } // switch (res) - } // while (true) + + + + +Compression::Result Compression::Compressor::CompressGZip(const ContiguousByteBufferView Input) +{ + return Compress<&libdeflate_gzip_compress>(Input.data(), Input.size()); } -extern int UncompressStringGZIP(const char * a_Data, size_t a_Length, AString & a_Uncompressed) +Compression::Result Compression::Compressor::CompressZLib(const ContiguousByteBufferView Input) { - // Uncompresses a_Data into a_Uncompressed using GZIP; returns Z_OK for success or Z_XXX error constants same as zlib + return Compress<&libdeflate_zlib_compress>(Input.data(), Input.size()); +} - a_Uncompressed.reserve(a_Length); - char Buffer[64 KiB]; - z_stream strm; - memset(&strm, 0, sizeof(strm)); - strm.next_in = reinterpret_cast(const_cast(a_Data)); - strm.avail_in = static_cast(a_Length); - strm.next_out = reinterpret_cast(Buffer); - strm.avail_out = sizeof(Buffer); - int res = inflateInit2(&strm, 31); // Force GZIP decoding - if (res != Z_OK) + + +Compression::Result Compression::Compressor::CompressZLib(const void * const Input, const size_t Size) +{ + return Compress<&libdeflate_zlib_compress>(Input, Size); +} + + + + + +Compression::Extractor::Extractor() +{ + m_Handle = libdeflate_alloc_decompressor(); + + if (m_Handle == nullptr) { - LOG("%s: uncompression initialization failed: %d (\"%s\").", __FUNCTION__, res, strm.msg); - return res; + throw std::bad_alloc(); } +} - for (;;) - { - res = inflate(&strm, Z_NO_FLUSH); - switch (res) - { - case Z_OK: - { - // Some data has been uncompressed. Consume the buffer and continue uncompressing - a_Uncompressed.append(Buffer, sizeof(Buffer) - strm.avail_out); - strm.next_out = reinterpret_cast(Buffer); - strm.avail_out = sizeof(Buffer); - if (strm.avail_in == 0) - { - // All data has been uncompressed - inflateEnd(&strm); - return Z_OK; - } - break; - } - case Z_STREAM_END: - { - // Finished uncompressing. Consume the rest of the buffer and return - a_Uncompressed.append(Buffer, sizeof(Buffer) - strm.avail_out); - inflateEnd(&strm); - return Z_OK; - } - default: - { - // An error has occurred, log it and return the error value - LOG("%s: uncompression failed: %d (\"%s\").", __FUNCTION__, res, strm.msg); - inflateEnd(&strm); - return res; - } - } // switch (res) - } // while (true) + + +Compression::Extractor::~Extractor() +{ + libdeflate_free_decompressor(m_Handle); +} + + + + + +Compression::Result Compression::Extractor::ExtractGZip(ContiguousByteBufferView Input) +{ + return Extract<&libdeflate_gzip_decompress>(Input); +} + + + + + +Compression::Result Compression::Extractor::ExtractZLib(ContiguousByteBufferView Input) +{ + return Extract<&libdeflate_zlib_decompress>(Input); } -extern int InflateString(const char * a_Data, size_t a_Length, AString & a_Uncompressed) +Compression::Result Compression::Extractor::ExtractZLib(ContiguousByteBufferView Input, size_t UncompressedSize) { - a_Uncompressed.reserve(a_Length); - - char Buffer[64 KiB]; - z_stream strm; - memset(&strm, 0, sizeof(strm)); - strm.next_in = reinterpret_cast(const_cast(a_Data)); - strm.avail_in = static_cast(a_Length); - strm.next_out = reinterpret_cast(Buffer); - strm.avail_out = sizeof(Buffer); - - int res = inflateInit(&strm); // Force GZIP decoding - if (res != Z_OK) + return Extract<&libdeflate_zlib_decompress>(Input, UncompressedSize); +} + + + + + +template +Compression::Result Compression::Extractor::Extract(const ContiguousByteBufferView Input) +{ + // First see if the stack buffer has enough space: { - LOG("%s: inflation initialization failed: %d (\"%s\").", __FUNCTION__, res, strm.msg); - return res; + Result::Static Buffer; + size_t BytesWrittenOut; + + switch (Algorithm(m_Handle, Input.data(), Input.size(), Buffer.data(), Buffer.size(), &BytesWrittenOut)) + { + case LIBDEFLATE_SUCCESS: return { Buffer, BytesWrittenOut }; + case LIBDEFLATE_INSUFFICIENT_SPACE: break; + default: throw std::runtime_error("Data extraction failed."); + } } - for (;;) + // No it doesn't. Allocate space on the heap to write the compression result, increasing in powers of 2. + + auto DynamicCapacity = Result::StaticCapacity * 2; + while (true) { - res = inflate(&strm, Z_NO_FLUSH); - switch (res) + size_t BytesWrittenOut; + auto Dynamic = cpp20::make_unique_for_overwrite(DynamicCapacity); + + switch (Algorithm(m_Handle, Input.data(), Input.size(), Dynamic.get(), DynamicCapacity, &BytesWrittenOut)) { - case Z_OK: + case libdeflate_result::LIBDEFLATE_SUCCESS: return { std::move(Dynamic), BytesWrittenOut }; + case libdeflate_result::LIBDEFLATE_INSUFFICIENT_SPACE: { - // Some data has been uncompressed. Consume the buffer and continue uncompressing - a_Uncompressed.append(Buffer, sizeof(Buffer) - strm.avail_out); - strm.next_out = reinterpret_cast(Buffer); - strm.avail_out = sizeof(Buffer); - if (strm.avail_in == 0) - { - // All data has been uncompressed - inflateEnd(&strm); - return Z_OK; - } - break; + DynamicCapacity *= 2; + continue; } + default: throw std::runtime_error("Data extraction failed."); + } + } +} - case Z_STREAM_END: - { - // Finished uncompressing. Consume the rest of the buffer and return - a_Uncompressed.append(Buffer, sizeof(Buffer) - strm.avail_out); - inflateEnd(&strm); - return Z_OK; - } - default: - { - // An error has occurred, log it and return the error value - LOG("%s: inflation failed: %d (\"%s\").", __FUNCTION__, res, strm.msg); - inflateEnd(&strm); - return res; - } - } // switch (res) - } // while (true) -} +template +Compression::Result Compression::Extractor::Extract(const ContiguousByteBufferView Input, size_t UncompressedSize) +{ + // Here we have the expected size after extraction, so directly use a suitable buffer size: + if (UncompressedSize <= Result::StaticCapacity) + { + if ( + Result::Static Buffer; + Algorithm(m_Handle, Input.data(), Input.size(), Buffer.data(), UncompressedSize, nullptr) == libdeflate_result::LIBDEFLATE_SUCCESS + ) + { + return { Buffer, UncompressedSize }; + } + } + else if ( + auto Dynamic = cpp20::make_unique_for_overwrite(UncompressedSize); + Algorithm(m_Handle, Input.data(), Input.size(), Dynamic.get(), UncompressedSize, nullptr) == libdeflate_result::LIBDEFLATE_SUCCESS + ) + { + return { std::move(Dynamic), UncompressedSize }; + } + throw std::runtime_error("Data extraction failed."); +} -- cgit v1.2.3