tvl-depot/src/libutil/compression.cc
Eelco Dolstra 25dff2b7db
runProgram(): Distinguish between empty input and no input
For example, if we call brotli with an empty input, it shouldn't read
from the caller's stdin.
2017-03-15 16:50:19 +01:00

312 lines
7.7 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "compression.hh"
#include "util.hh"
#include "finally.hh"
#include <lzma.h>
#include <bzlib.h>
#include <cstdio>
#include <cstring>
#include <iostream>
namespace nix {
static ref<std::string> decompressXZ(const std::string & in)
{
lzma_stream strm(LZMA_STREAM_INIT);
lzma_ret ret = lzma_stream_decoder(
&strm, UINT64_MAX, LZMA_CONCATENATED);
if (ret != LZMA_OK)
throw Error("unable to initialise lzma decoder");
Finally free([&]() { lzma_end(&strm); });
lzma_action action = LZMA_RUN;
uint8_t outbuf[BUFSIZ];
ref<std::string> res = make_ref<std::string>();
strm.next_in = (uint8_t *) in.c_str();
strm.avail_in = in.size();
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
while (true) {
checkInterrupt();
if (strm.avail_in == 0)
action = LZMA_FINISH;
lzma_ret ret = lzma_code(&strm, action);
if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
res->append((char *) outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
if (ret == LZMA_STREAM_END)
return res;
if (ret != LZMA_OK)
throw Error("error while decompressing xz file");
}
}
static ref<std::string> decompressBzip2(const std::string & in)
{
bz_stream strm;
memset(&strm, 0, sizeof(strm));
int ret = BZ2_bzDecompressInit(&strm, 0, 0);
if (ret != BZ_OK)
throw Error("unable to initialise bzip2 decoder");
Finally free([&]() { BZ2_bzDecompressEnd(&strm); });
char outbuf[BUFSIZ];
ref<std::string> res = make_ref<std::string>();
strm.next_in = (char *) in.c_str();
strm.avail_in = in.size();
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
while (true) {
checkInterrupt();
int ret = BZ2_bzDecompress(&strm);
if (strm.avail_out == 0 || ret == BZ_STREAM_END) {
res->append(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
if (ret == BZ_STREAM_END)
return res;
if (ret != BZ_OK)
throw Error("error while decompressing bzip2 file");
}
}
static ref<std::string> decompressBrotli(const std::string & in)
{
// FIXME: use libbrotli
return make_ref<std::string>(runProgram(BRO, true, {"-d"}, {in}));
}
ref<std::string> compress(const std::string & method, const std::string & in)
{
StringSink ssink;
auto sink = makeCompressionSink(method, ssink);
(*sink)(in);
sink->finish();
return ssink.s;
}
ref<std::string> decompress(const std::string & method, const std::string & in)
{
if (method == "none")
return make_ref<std::string>(in);
else if (method == "xz")
return decompressXZ(in);
else if (method == "bzip2")
return decompressBzip2(in);
else if (method == "br")
return decompressBrotli(in);
else
throw UnknownCompressionMethod(format("unknown compression method %s") % method);
}
struct NoneSink : CompressionSink
{
Sink & nextSink;
NoneSink(Sink & nextSink) : nextSink(nextSink) { }
void finish() override { flush(); }
void write(const unsigned char * data, size_t len) override { nextSink(data, len); }
};
struct XzSink : CompressionSink
{
Sink & nextSink;
uint8_t outbuf[BUFSIZ];
lzma_stream strm = LZMA_STREAM_INIT;
bool finished = false;
XzSink(Sink & nextSink) : nextSink(nextSink)
{
lzma_ret ret = lzma_easy_encoder(
&strm, 6, LZMA_CHECK_CRC64);
if (ret != LZMA_OK)
throw Error("unable to initialise lzma encoder");
// FIXME: apply the x86 BCJ filter?
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
~XzSink()
{
lzma_end(&strm);
}
void finish() override
{
CompressionSink::flush();
assert(!finished);
finished = true;
while (true) {
checkInterrupt();
lzma_ret ret = lzma_code(&strm, LZMA_FINISH);
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
throw Error("error while flushing xz file");
if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
if (ret == LZMA_STREAM_END) break;
}
}
void write(const unsigned char * data, size_t len) override
{
assert(!finished);
strm.next_in = data;
strm.avail_in = len;
while (strm.avail_in) {
checkInterrupt();
lzma_ret ret = lzma_code(&strm, LZMA_RUN);
if (ret != LZMA_OK)
throw Error("error while compressing xz file");
if (strm.avail_out == 0) {
nextSink(outbuf, sizeof(outbuf));
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
}
}
};
struct BzipSink : CompressionSink
{
Sink & nextSink;
char outbuf[BUFSIZ];
bz_stream strm;
bool finished = false;
BzipSink(Sink & nextSink) : nextSink(nextSink)
{
memset(&strm, 0, sizeof(strm));
int ret = BZ2_bzCompressInit(&strm, 9, 0, 30);
if (ret != BZ_OK)
throw Error("unable to initialise bzip2 encoder");
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
~BzipSink()
{
BZ2_bzCompressEnd(&strm);
}
void finish() override
{
flush();
assert(!finished);
finished = true;
while (true) {
checkInterrupt();
int ret = BZ2_bzCompress(&strm, BZ_FINISH);
if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
throw Error("error while flushing bzip2 file");
if (strm.avail_out == 0 || ret == BZ_STREAM_END) {
nextSink((unsigned char *) outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
if (ret == BZ_STREAM_END) break;
}
}
void write(const unsigned char * data, size_t len) override
{
assert(!finished);
strm.next_in = (char *) data;
strm.avail_in = len;
while (strm.avail_in) {
checkInterrupt();
int ret = BZ2_bzCompress(&strm, BZ_RUN);
if (ret != BZ_OK)
Error("error while compressing bzip2 file");
if (strm.avail_out == 0) {
nextSink((unsigned char *) outbuf, sizeof(outbuf));
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
}
}
};
struct BrotliSink : CompressionSink
{
Sink & nextSink;
std::string data;
BrotliSink(Sink & nextSink) : nextSink(nextSink)
{
}
~BrotliSink()
{
}
// FIXME: use libbrotli
void finish() override
{
flush();
nextSink(runProgram(BRO, true, {}, data));
}
void write(const unsigned char * data, size_t len) override
{
checkInterrupt();
this->data.append((const char *) data, len);
}
};
ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink)
{
if (method == "none")
return make_ref<NoneSink>(nextSink);
else if (method == "xz")
return make_ref<XzSink>(nextSink);
else if (method == "bzip2")
return make_ref<BzipSink>(nextSink);
else if (method == "br")
return make_ref<BrotliSink>(nextSink);
else
throw UnknownCompressionMethod(format("unknown compression method %s") % method);
}
}