2015-10-30 12:33:40 +01:00
# include "compression.hh"
2016-04-22 18:15:02 +02:00
# include "util.hh"
2016-04-29 17:43:37 +02:00
# include "finally.hh"
2018-02-11 19:47:42 +01:00
# include "logging.hh"
2015-10-30 12:33:40 +01:00
# include <lzma.h>
2016-04-29 17:43:37 +02:00
# include <bzlib.h>
2015-12-31 14:18:20 +01:00
# include <cstdio>
2016-04-29 17:43:37 +02:00
# include <cstring>
2015-10-30 12:33:40 +01:00
2017-12-29 21:42:14 +01:00
# if HAVE_BROTLI
# include <brotli/decode.h>
# include <brotli/encode.h>
# endif // HAVE_BROTLI
2016-05-04 15:46:25 +02:00
# include <iostream>
2016-02-15 21:45:56 +01:00
2016-05-04 15:46:25 +02:00
namespace nix {
2016-02-15 21:45:56 +01:00
2016-04-29 17:02:57 +02:00
static ref < std : : string > decompressXZ ( const std : : string & in )
2015-10-30 12:33:40 +01:00
{
2016-05-04 11:50:12 +02:00
lzma_stream strm ( LZMA_STREAM_INIT ) ;
2015-10-30 12:33:40 +01:00
lzma_ret ret = lzma_stream_decoder (
2016-04-29 17:43:37 +02:00
& strm , UINT64_MAX , LZMA_CONCATENATED ) ;
2015-10-30 12:33:40 +01:00
if ( ret ! = LZMA_OK )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " unable to initialise lzma decoder " ) ;
2015-10-30 12:33:40 +01:00
2016-04-29 17:43:37 +02:00
Finally free ( [ & ] ( ) { lzma_end ( & strm ) ; } ) ;
2015-10-30 12:33:40 +01:00
lzma_action action = LZMA_RUN ;
uint8_t outbuf [ BUFSIZ ] ;
2016-04-15 15:11:34 +02:00
ref < std : : string > res = make_ref < std : : string > ( ) ;
2016-04-29 17:43:37 +02:00
strm . next_in = ( uint8_t * ) in . c_str ( ) ;
strm . avail_in = in . size ( ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
2015-10-30 12:33:40 +01:00
while ( true ) {
2016-04-22 18:15:02 +02:00
checkInterrupt ( ) ;
2015-10-30 12:33:40 +01:00
2016-04-29 17:43:37 +02:00
if ( strm . avail_in = = 0 )
2015-10-30 12:33:40 +01:00
action = LZMA_FINISH ;
2016-04-29 17:43:37 +02:00
lzma_ret ret = lzma_code ( & strm , action ) ;
2015-10-30 12:33:40 +01:00
2016-04-29 17:43:37 +02:00
if ( strm . avail_out = = 0 | | ret = = LZMA_STREAM_END ) {
res - > append ( ( char * ) outbuf , sizeof ( outbuf ) - strm . avail_out ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
2015-10-30 12:33:40 +01:00
}
if ( ret = = LZMA_STREAM_END )
return res ;
if ( ret ! = LZMA_OK )
2017-03-22 11:53:33 +01:00
throw CompressionError ( " error %d while decompressing xz file " , ret ) ;
2015-10-30 12:33:40 +01:00
}
}
2016-04-29 17:43:37 +02:00
static ref < std : : string > decompressBzip2 ( const std : : string & in )
{
bz_stream strm ;
memset ( & strm , 0 , sizeof ( strm ) ) ;
int ret = BZ2_bzDecompressInit ( & strm , 0 , 0 ) ;
if ( ret ! = BZ_OK )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " unable to initialise bzip2 decoder " ) ;
2016-04-29 17:43:37 +02:00
Finally free ( [ & ] ( ) { BZ2_bzDecompressEnd ( & strm ) ; } ) ;
char outbuf [ BUFSIZ ] ;
ref < std : : string > res = make_ref < std : : string > ( ) ;
strm . next_in = ( char * ) in . c_str ( ) ;
strm . avail_in = in . size ( ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
while ( true ) {
checkInterrupt ( ) ;
int ret = BZ2_bzDecompress ( & strm ) ;
if ( strm . avail_out = = 0 | | ret = = BZ_STREAM_END ) {
res - > append ( outbuf , sizeof ( outbuf ) - strm . avail_out ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
if ( ret = = BZ_STREAM_END )
return res ;
if ( ret ! = BZ_OK )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " error while decompressing bzip2 file " ) ;
2017-03-21 19:20:21 +01:00
if ( strm . avail_in = = 0 )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " bzip2 data ends prematurely " ) ;
2016-04-29 17:43:37 +02:00
}
}
2017-03-13 14:40:15 +01:00
static ref < std : : string > decompressBrotli ( const std : : string & in )
{
2017-12-29 21:42:14 +01:00
# if !HAVE_BROTLI
return make_ref < std : : string > ( runProgram ( BROTLI , true , { " -d " } , { in } ) ) ;
# else
auto * s = BrotliDecoderCreateInstance ( nullptr , nullptr , nullptr ) ;
if ( ! s )
throw CompressionError ( " unable to initialize brotli decoder " ) ;
Finally free ( [ s ] ( ) { BrotliDecoderDestroyInstance ( s ) ; } ) ;
uint8_t outbuf [ BUFSIZ ] ;
ref < std : : string > res = make_ref < std : : string > ( ) ;
const uint8_t * next_in = ( uint8_t * ) in . c_str ( ) ;
size_t avail_in = in . size ( ) ;
uint8_t * next_out = outbuf ;
size_t avail_out = sizeof ( outbuf ) ;
while ( true ) {
checkInterrupt ( ) ;
auto ret = BrotliDecoderDecompressStream ( s ,
& avail_in , & next_in ,
& avail_out , & next_out ,
nullptr ) ;
switch ( ret ) {
case BROTLI_DECODER_RESULT_ERROR :
throw CompressionError ( " error while decompressing brotli file " ) ;
case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT :
throw CompressionError ( " incomplete or corrupt brotli file " ) ;
case BROTLI_DECODER_RESULT_SUCCESS :
if ( avail_in ! = 0 )
throw CompressionError ( " unexpected input after brotli decompression " ) ;
break ;
case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT :
// I'm not sure if this can happen, but abort if this happens with empty buffer
if ( avail_out = = sizeof ( outbuf ) )
throw CompressionError ( " brotli decompression requires larger buffer " ) ;
break ;
}
// Always ensure we have full buffer for next invocation
if ( avail_out < sizeof ( outbuf ) ) {
res - > append ( ( char * ) outbuf , sizeof ( outbuf ) - avail_out ) ;
next_out = outbuf ;
avail_out = sizeof ( outbuf ) ;
}
if ( ret = = BROTLI_DECODER_RESULT_SUCCESS ) return res ;
}
# endif // HAVE_BROTLI
2017-03-13 14:40:15 +01:00
}
2018-02-07 17:54:08 +01:00
ref < std : : string > compress ( const std : : string & method , const std : : string & in , const bool parallel )
2016-05-04 15:46:25 +02:00
{
StringSink ssink ;
2018-02-07 17:54:08 +01:00
auto sink = makeCompressionSink ( method , ssink , parallel ) ;
2016-05-04 15:46:25 +02:00
( * sink ) ( in ) ;
sink - > finish ( ) ;
return ssink . s ;
}
ref < std : : string > decompress ( const std : : string & method , const std : : string & in )
2016-04-29 17:02:57 +02:00
{
if ( method = = " none " )
2016-05-04 13:36:54 +02:00
return make_ref < std : : string > ( in ) ;
2016-04-29 17:02:57 +02:00
else if ( method = = " xz " )
2016-05-04 15:46:25 +02:00
return decompressXZ ( in ) ;
2016-04-29 17:43:37 +02:00
else if ( method = = " bzip2 " )
2016-05-04 15:46:25 +02:00
return decompressBzip2 ( in ) ;
2017-03-13 14:40:15 +01:00
else if ( method = = " br " )
return decompressBrotli ( in ) ;
2016-04-29 17:02:57 +02:00
else
2017-07-30 13:27:57 +02:00
throw UnknownCompressionMethod ( format ( " unknown compression method '%s' " ) % method ) ;
2016-04-29 17:02:57 +02:00
}
2016-05-04 15:46:25 +02:00
struct NoneSink : CompressionSink
{
Sink & nextSink ;
NoneSink ( Sink & nextSink ) : nextSink ( nextSink ) { }
void finish ( ) override { flush ( ) ; }
void write ( const unsigned char * data , size_t len ) override { nextSink ( data , len ) ; }
} ;
struct XzSink : CompressionSink
{
Sink & nextSink ;
uint8_t outbuf [ BUFSIZ ] ;
lzma_stream strm = LZMA_STREAM_INIT ;
bool finished = false ;
2018-02-11 20:23:31 +01:00
template < typename F >
XzSink ( Sink & nextSink , F & & initEncoder ) : nextSink ( nextSink ) {
lzma_ret ret = initEncoder ( ) ;
2016-05-04 15:46:25 +02:00
if ( ret ! = LZMA_OK )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " unable to initialise lzma encoder " ) ;
2016-05-04 15:46:25 +02:00
// FIXME: apply the x86 BCJ filter?
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
2018-02-11 20:23:31 +01:00
XzSink ( Sink & nextSink ) : XzSink ( nextSink , [ this ] ( ) {
return lzma_easy_encoder ( & strm , 6 , LZMA_CHECK_CRC64 ) ;
} ) { }
2016-05-04 15:46:25 +02:00
~ XzSink ( )
{
lzma_end ( & strm ) ;
}
void finish ( ) override
{
CompressionSink : : flush ( ) ;
assert ( ! finished ) ;
finished = true ;
while ( true ) {
checkInterrupt ( ) ;
lzma_ret ret = lzma_code ( & strm , LZMA_FINISH ) ;
if ( ret ! = LZMA_OK & & ret ! = LZMA_STREAM_END )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " error while flushing xz file " ) ;
2016-05-04 15:46:25 +02:00
if ( strm . avail_out = = 0 | | ret = = LZMA_STREAM_END ) {
nextSink ( outbuf , sizeof ( outbuf ) - strm . avail_out ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
if ( ret = = LZMA_STREAM_END ) break ;
}
}
void write ( const unsigned char * data , size_t len ) override
{
assert ( ! finished ) ;
strm . next_in = data ;
strm . avail_in = len ;
while ( strm . avail_in ) {
checkInterrupt ( ) ;
lzma_ret ret = lzma_code ( & strm , LZMA_RUN ) ;
if ( ret ! = LZMA_OK )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " error while compressing xz file " ) ;
2016-05-04 15:46:25 +02:00
if ( strm . avail_out = = 0 ) {
nextSink ( outbuf , sizeof ( outbuf ) ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
}
}
} ;
2018-02-11 20:23:31 +01:00
# ifdef HAVE_LZMA_MT
struct ParallelXzSink : public XzSink
{
ParallelXzSink ( Sink & nextSink ) : XzSink ( nextSink , [ this ] ( ) {
lzma_mt mt_options = { } ;
mt_options . flags = 0 ;
mt_options . timeout = 300 ; // Using the same setting as the xz cmd line
mt_options . preset = LZMA_PRESET_DEFAULT ;
mt_options . filters = NULL ;
mt_options . check = LZMA_CHECK_CRC64 ;
mt_options . threads = lzma_cputhreads ( ) ;
mt_options . block_size = 0 ;
if ( mt_options . threads = = 0 )
mt_options . threads = 1 ;
// FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
// number of threads.
return lzma_stream_encoder_mt ( & strm , & mt_options ) ;
} ) { }
} ;
# endif
2016-05-04 15:46:25 +02:00
struct BzipSink : CompressionSink
{
Sink & nextSink ;
char outbuf [ BUFSIZ ] ;
bz_stream strm ;
bool finished = false ;
BzipSink ( Sink & nextSink ) : nextSink ( nextSink )
{
memset ( & strm , 0 , sizeof ( strm ) ) ;
int ret = BZ2_bzCompressInit ( & strm , 9 , 0 , 30 ) ;
if ( ret ! = BZ_OK )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " unable to initialise bzip2 encoder " ) ;
2016-05-04 15:46:25 +02:00
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
~ BzipSink ( )
{
BZ2_bzCompressEnd ( & strm ) ;
}
void finish ( ) override
{
flush ( ) ;
assert ( ! finished ) ;
finished = true ;
while ( true ) {
checkInterrupt ( ) ;
int ret = BZ2_bzCompress ( & strm , BZ_FINISH ) ;
if ( ret ! = BZ_FINISH_OK & & ret ! = BZ_STREAM_END )
2017-03-21 19:23:07 +01:00
throw CompressionError ( " error while flushing bzip2 file " ) ;
2016-05-04 15:46:25 +02:00
if ( strm . avail_out = = 0 | | ret = = BZ_STREAM_END ) {
nextSink ( ( unsigned char * ) outbuf , sizeof ( outbuf ) - strm . avail_out ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
if ( ret = = BZ_STREAM_END ) break ;
}
}
void write ( const unsigned char * data , size_t len ) override
{
assert ( ! finished ) ;
strm . next_in = ( char * ) data ;
strm . avail_in = len ;
while ( strm . avail_in ) {
checkInterrupt ( ) ;
int ret = BZ2_bzCompress ( & strm , BZ_RUN ) ;
if ( ret ! = BZ_OK )
2017-03-21 19:23:07 +01:00
CompressionError ( " error while compressing bzip2 file " ) ;
2016-05-04 15:46:25 +02:00
if ( strm . avail_out = = 0 ) {
nextSink ( ( unsigned char * ) outbuf , sizeof ( outbuf ) ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
}
}
} ;
2017-12-29 21:42:14 +01:00
struct LambdaCompressionSink : CompressionSink
2017-03-14 15:03:53 +01:00
{
Sink & nextSink ;
std : : string data ;
2017-12-29 21:42:14 +01:00
using CompressFnTy = std : : function < std : : string ( const std : : string & ) > ;
CompressFnTy compressFn ;
LambdaCompressionSink ( Sink & nextSink , CompressFnTy compressFn )
: nextSink ( nextSink )
, compressFn ( std : : move ( compressFn ) )
{
} ;
void finish ( ) override
{
flush ( ) ;
nextSink ( compressFn ( data ) ) ;
}
void write ( const unsigned char * data , size_t len ) override
{
checkInterrupt ( ) ;
this - > data . append ( ( const char * ) data , len ) ;
}
} ;
struct BrotliCmdSink : LambdaCompressionSink
{
BrotliCmdSink ( Sink & nextSink )
: LambdaCompressionSink ( nextSink , [ ] ( const std : : string & data ) {
return runProgram ( BROTLI , true , { } , data ) ;
} )
{
}
} ;
# if HAVE_BROTLI
struct BrotliSink : CompressionSink
{
Sink & nextSink ;
uint8_t outbuf [ BUFSIZ ] ;
BrotliEncoderState * state ;
bool finished = false ;
2017-03-14 15:03:53 +01:00
BrotliSink ( Sink & nextSink ) : nextSink ( nextSink )
{
2017-12-29 21:42:14 +01:00
state = BrotliEncoderCreateInstance ( nullptr , nullptr , nullptr ) ;
if ( ! state )
throw CompressionError ( " unable to initialise brotli encoder " ) ;
2017-03-14 15:03:53 +01:00
}
~ BrotliSink ( )
{
2017-12-29 21:42:14 +01:00
BrotliEncoderDestroyInstance ( state ) ;
2017-03-14 15:03:53 +01:00
}
void finish ( ) override
{
flush ( ) ;
2017-12-29 21:42:14 +01:00
assert ( ! finished ) ;
const uint8_t * next_in = nullptr ;
size_t avail_in = 0 ;
uint8_t * next_out = outbuf ;
size_t avail_out = sizeof ( outbuf ) ;
while ( ! finished ) {
checkInterrupt ( ) ;
if ( ! BrotliEncoderCompressStream ( state ,
BROTLI_OPERATION_FINISH ,
& avail_in , & next_in ,
& avail_out , & next_out ,
nullptr ) )
throw CompressionError ( " error while finishing brotli file " ) ;
finished = BrotliEncoderIsFinished ( state ) ;
if ( avail_out = = 0 | | finished ) {
nextSink ( outbuf , sizeof ( outbuf ) - avail_out ) ;
next_out = outbuf ;
avail_out = sizeof ( outbuf ) ;
}
}
2017-03-14 15:03:53 +01:00
}
void write ( const unsigned char * data , size_t len ) override
{
2017-12-29 21:42:14 +01:00
assert ( ! finished ) ;
// Don't feed brotli too much at once
const size_t CHUNK_SIZE = sizeof ( outbuf ) < < 2 ;
while ( len ) {
size_t n = std : : min ( CHUNK_SIZE , len ) ;
writeInternal ( data , n ) ;
data + = n ;
len - = n ;
}
}
private :
void writeInternal ( const unsigned char * data , size_t len )
{
assert ( ! finished ) ;
const uint8_t * next_in = data ;
size_t avail_in = len ;
uint8_t * next_out = outbuf ;
size_t avail_out = sizeof ( outbuf ) ;
while ( avail_in > 0 ) {
checkInterrupt ( ) ;
if ( ! BrotliEncoderCompressStream ( state ,
BROTLI_OPERATION_PROCESS ,
& avail_in , & next_in ,
& avail_out , & next_out ,
nullptr ) )
throw CompressionError ( " error while compressing brotli file " ) ;
if ( avail_out < sizeof ( outbuf ) | | avail_in = = 0 ) {
nextSink ( outbuf , sizeof ( outbuf ) - avail_out ) ;
next_out = outbuf ;
avail_out = sizeof ( outbuf ) ;
}
}
2017-03-14 15:03:53 +01:00
}
} ;
2017-12-29 21:42:14 +01:00
# endif // HAVE_BROTLI
2017-03-14 15:03:53 +01:00
2018-02-07 17:54:08 +01:00
ref < CompressionSink > makeCompressionSink ( const std : : string & method , Sink & nextSink , const bool parallel )
2016-04-29 17:02:57 +02:00
{
2018-02-11 20:23:31 +01:00
if ( parallel ) {
# ifdef HAVE_LZMA_MT
if ( method = = " xz " )
return make_ref < ParallelXzSink > ( nextSink ) ;
# endif
2018-02-11 19:47:42 +01:00
printMsg ( lvlError , format ( " Warning: parallel compression requested but not supported for method '%1%', falling back to single-threaded compression " ) % method ) ;
2018-02-11 20:23:31 +01:00
}
2018-02-11 19:47:42 +01:00
2016-04-29 17:02:57 +02:00
if ( method = = " none " )
2016-05-04 15:46:25 +02:00
return make_ref < NoneSink > ( nextSink ) ;
2016-04-29 17:02:57 +02:00
else if ( method = = " xz " )
2018-02-11 20:23:31 +01:00
return make_ref < XzSink > ( nextSink ) ;
2016-04-29 17:43:37 +02:00
else if ( method = = " bzip2 " )
2016-05-04 15:46:25 +02:00
return make_ref < BzipSink > ( nextSink ) ;
2017-03-14 15:03:53 +01:00
else if ( method = = " br " )
2017-12-29 21:42:14 +01:00
# if HAVE_BROTLI
2017-03-14 15:03:53 +01:00
return make_ref < BrotliSink > ( nextSink ) ;
2017-12-29 21:42:14 +01:00
# else
return make_ref < BrotliCmdSink > ( nextSink ) ;
# endif
2016-04-29 17:02:57 +02:00
else
2017-07-30 13:27:57 +02:00
throw UnknownCompressionMethod ( format ( " unknown compression method '%s' " ) % method ) ;
2016-04-29 17:02:57 +02:00
}
2015-10-30 12:33:40 +01:00
}