feat(tvix/glue): Add AsyncRead wrapper to decompress streams

Add a new AsyncRead wrapper, DecompressedReader, that wraps an
underlying AsyncRead, but sniffs the magic bytes at the start of the
stream to determine which compression format is being used out of the
three that are supported by builtins.fetchTarball, and switches to the
correct decompression algorithm adapter dynamically.

This will be used in the implementation of builtins.fetchTarball

Change-Id: I892a4683d5c93e67d4c173f3d21199bdc6605922
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11019
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Aspen Smith 2024-02-23 14:42:52 -05:00 committed by aspen
parent de727bccf9
commit 54609e8c17
8 changed files with 777 additions and 1 deletions

125
tvix/Cargo.lock generated
View file

@ -104,6 +104,21 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "async-compression"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c"
dependencies = [
"bzip2",
"flate2",
"futures-core",
"memchr",
"pin-project-lite",
"tokio",
"xz2",
]
[[package]]
name = "async-recursion"
version = "1.0.5"
@ -322,6 +337,27 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
[[package]]
name = "bzip2"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
dependencies = [
"bzip2-sys",
"libc",
]
[[package]]
name = "bzip2-sys"
version = "0.1.11+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "caps"
version = "0.5.5"
@ -797,12 +833,34 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27573eac26f4dd11e2b1916c3fe1baa56407c83c71a773a8ba17ec0bca03b6b7"
[[package]]
name = "filetime"
version = "0.2.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd"
dependencies = [
"cfg-if",
"libc",
"redox_syscall 0.4.1",
"windows-sys 0.52.0",
]
[[package]]
name = "fixedbitset"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "flate2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -1412,6 +1470,28 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "magic"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a200ae03df8c3dce7a963f6eeaac8feb41bf9001cb7e5ab22e3205aec2f0373d"
dependencies = [
"bitflags 2.4.2",
"libc",
"magic-sys",
"thiserror",
]
[[package]]
name = "magic-sys"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eff86ae08895140d628119d407d568f3b657145ee8c265878064f717534bb3bc"
dependencies = [
"libc",
"vcpkg",
]
[[package]]
name = "matchit"
version = "0.7.3"
@ -2154,6 +2234,15 @@ dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "redox_syscall"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "redox_syscall"
version = "0.4.1"
@ -3013,6 +3102,21 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-tar"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d5714c010ca3e5c27114c1cdeb9d14641ace49874aa5626d7149e47aedace75"
dependencies = [
"filetime",
"futures-core",
"libc",
"redox_syscall 0.3.5",
"tokio",
"tokio-stream",
"xattr",
]
[[package]]
name = "tokio-util"
version = "0.7.10"
@ -3413,6 +3517,7 @@ dependencies = [
name = "tvix-glue"
version = "0.1.0"
dependencies = [
"async-compression",
"async-recursion",
"bstr",
"bytes",
@ -3421,8 +3526,10 @@ dependencies = [
"futures",
"hex-literal",
"lazy_static",
"magic",
"nix 0.27.1",
"nix-compat",
"pin-project",
"pretty_assertions",
"reqwest",
"rstest",
@ -3433,6 +3540,7 @@ dependencies = [
"test-case",
"thiserror",
"tokio",
"tokio-tar",
"tokio-util",
"tracing",
"tvix-build",
@ -3593,6 +3701,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.4"
@ -3994,6 +4108,17 @@ name = "wu-manber"
version = "0.1.0"
source = "git+https://github.com/tvlfyi/wu-manber.git#0d5b22bea136659f7de60b102a7030e0daaa503d"
[[package]]
name = "xattr"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f"
dependencies = [
"libc",
"linux-raw-sys",
"rustix",
]
[[package]]
name = "xml-rs"
version = "0.8.19"

View file

@ -386,6 +386,74 @@ rec {
"zeroize" = [ "dep:zeroize" ];
};
};
"async-compression" = rec {
crateName = "async-compression";
version = "0.4.6";
edition = "2018";
sha256 = "0b6874q56g1cx8ivs9j89d757rsh9kyrrwlp1852094jjrmg85m1";
authors = [
"Wim Looman <wim@nemo157.com>"
"Allen Bui <fairingrey@gmail.com>"
];
dependencies = [
{
name = "bzip2";
packageId = "bzip2";
optional = true;
}
{
name = "flate2";
packageId = "flate2";
optional = true;
}
{
name = "futures-core";
packageId = "futures-core";
usesDefaultFeatures = false;
}
{
name = "memchr";
packageId = "memchr";
}
{
name = "pin-project-lite";
packageId = "pin-project-lite";
}
{
name = "tokio";
packageId = "tokio";
optional = true;
usesDefaultFeatures = false;
}
{
name = "xz2";
packageId = "xz2";
optional = true;
}
];
features = {
"all" = [ "all-implementations" "all-algorithms" ];
"all-algorithms" = [ "brotli" "bzip2" "deflate" "gzip" "lzma" "xz" "zlib" "zstd" "deflate64" ];
"all-implementations" = [ "futures-io" "tokio" ];
"brotli" = [ "dep:brotli" ];
"bzip2" = [ "dep:bzip2" ];
"deflate" = [ "flate2" ];
"deflate64" = [ "dep:deflate64" ];
"flate2" = [ "dep:flate2" ];
"futures-io" = [ "dep:futures-io" ];
"gzip" = [ "flate2" ];
"libzstd" = [ "dep:libzstd" ];
"lzma" = [ "xz2" ];
"tokio" = [ "dep:tokio" ];
"xz" = [ "xz2" ];
"xz2" = [ "dep:xz2" ];
"zlib" = [ "flate2" ];
"zstd" = [ "libzstd" "zstd-safe" ];
"zstd-safe" = [ "dep:zstd-safe" ];
"zstdmt" = [ "zstd" "zstd-safe/zstdmt" ];
};
resolvedDefaultFeatures = [ "bzip2" "flate2" "gzip" "tokio" "xz" "xz2" ];
};
"async-recursion" = rec {
crateName = "async-recursion";
version = "1.0.5";
@ -1045,6 +1113,60 @@ rec {
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"bzip2" = rec {
crateName = "bzip2";
version = "0.4.4";
edition = "2015";
sha256 = "1y27wgqkx3k2jmh4k26vra2kqjq1qc1asww8hac3cv1zxyk1dcdx";
authors = [
"Alex Crichton <alex@alexcrichton.com>"
];
dependencies = [
{
name = "bzip2-sys";
packageId = "bzip2-sys";
}
{
name = "libc";
packageId = "libc";
}
];
features = {
"futures" = [ "dep:futures" ];
"static" = [ "bzip2-sys/static" ];
"tokio" = [ "tokio-io" "futures" ];
"tokio-io" = [ "dep:tokio-io" ];
};
};
"bzip2-sys" = rec {
crateName = "bzip2-sys";
version = "0.1.11+1.0.8";
edition = "2015";
links = "bzip2";
sha256 = "1p2crnv8d8gpz5c2vlvzl0j55i3yqg5bi0kwsl1531x77xgraskk";
libName = "bzip2_sys";
libPath = "lib.rs";
authors = [
"Alex Crichton <alex@alexcrichton.com>"
];
dependencies = [
{
name = "libc";
packageId = "libc";
}
];
buildDependencies = [
{
name = "cc";
packageId = "cc";
}
{
name = "pkg-config";
packageId = "pkg-config";
}
];
features = { };
};
"caps" = rec {
crateName = "caps";
version = "0.5.5";
@ -2339,6 +2461,38 @@ rec {
"default" = [ "std" ];
};
};
"filetime" = rec {
crateName = "filetime";
version = "0.2.23";
edition = "2018";
sha256 = "1za0sbq7fqidk8aaq9v7m9ms0sv8mmi49g6p5cphpan819q4gr0y";
authors = [
"Alex Crichton <alex@alexcrichton.com>"
];
dependencies = [
{
name = "cfg-if";
packageId = "cfg-if";
}
{
name = "libc";
packageId = "libc";
target = { target, features }: (target."unix" or false);
}
{
name = "redox_syscall";
packageId = "redox_syscall 0.4.1";
target = { target, features }: ("redox" == target."os" or null);
}
{
name = "windows-sys";
packageId = "windows-sys 0.52.0";
target = { target, features }: (target."windows" or false);
features = [ "Win32_Foundation" "Win32_Storage_FileSystem" ];
}
];
};
"fixedbitset" = rec {
crateName = "fixedbitset";
version = "0.4.2";
@ -2352,6 +2506,52 @@ rec {
"serde" = [ "dep:serde" ];
};
};
"flate2" = rec {
crateName = "flate2";
version = "1.0.28";
edition = "2018";
sha256 = "03llhsh4gqdirnfxxb9g2w9n0721dyn4yjir3pz7z4vjaxb3yc26";
authors = [
"Alex Crichton <alex@alexcrichton.com>"
"Josh Triplett <josh@joshtriplett.org>"
];
dependencies = [
{
name = "crc32fast";
packageId = "crc32fast";
}
{
name = "miniz_oxide";
packageId = "miniz_oxide";
optional = true;
usesDefaultFeatures = false;
features = [ "with-alloc" ];
}
{
name = "miniz_oxide";
packageId = "miniz_oxide";
usesDefaultFeatures = false;
target = { target, features }: (("wasm32" == target."arch" or null) && (!("emscripten" == target."os" or null)));
features = [ "with-alloc" ];
}
];
features = {
"any_zlib" = [ "any_impl" ];
"cloudflare-zlib-sys" = [ "dep:cloudflare-zlib-sys" ];
"cloudflare_zlib" = [ "any_zlib" "cloudflare-zlib-sys" ];
"default" = [ "rust_backend" ];
"libz-ng-sys" = [ "dep:libz-ng-sys" ];
"libz-sys" = [ "dep:libz-sys" ];
"miniz-sys" = [ "rust_backend" ];
"miniz_oxide" = [ "dep:miniz_oxide" ];
"rust_backend" = [ "miniz_oxide" "any_impl" ];
"zlib" = [ "any_zlib" "libz-sys" ];
"zlib-default" = [ "any_zlib" "libz-sys/default" ];
"zlib-ng" = [ "any_zlib" "libz-ng-sys" ];
"zlib-ng-compat" = [ "zlib" "libz-sys/zlib-ng" ];
};
resolvedDefaultFeatures = [ "any_impl" "default" "miniz_oxide" "rust_backend" ];
};
"fnv" = rec {
crateName = "fnv";
version = "1.0.7";
@ -4100,7 +4300,7 @@ rec {
"default" = [ "std" "general" "errno" ];
"rustc-dep-of-std" = [ "core" "compiler_builtins" "no_std" ];
};
resolvedDefaultFeatures = [ "elf" "errno" "general" "ioctl" "no_std" ];
resolvedDefaultFeatures = [ "elf" "errno" "general" "ioctl" "no_std" "std" ];
};
"litrs" = rec {
crateName = "litrs";
@ -4192,6 +4392,80 @@ rec {
];
features = { };
};
"magic" = rec {
crateName = "magic";
version = "0.16.2";
edition = "2018";
sha256 = "0g9py31aw19j5sr5lznb068byhgbiynflvizjrxcwgccvw1sw052";
authors = [
"Daniel Micay <danielmicay@gmail.com>"
"Petar Radošević <petar@wunki.org>"
"lilydjwg <lilydjwg@gmail.com>"
"Jeff Belgum <belgum@bastille.io>"
"Onur Aslan <onur@onur.im>"
"robo9k <robo9k@symlink.io>"
];
dependencies = [
{
name = "bitflags";
packageId = "bitflags 2.4.2";
}
{
name = "libc";
packageId = "libc";
usesDefaultFeatures = false;
}
{
name = "magic-sys";
packageId = "magic-sys";
}
{
name = "thiserror";
packageId = "thiserror";
}
];
};
"magic-sys" = rec {
crateName = "magic-sys";
version = "0.3.0";
edition = "2015";
links = "magic";
sha256 = "1g5k9d9igxv4h23nbhp8bqa5gdpkd3ahgm0rh5i0s54mi3h6my7g";
authors = [
"robo9k <robo9k@symlink.io>"
];
dependencies = [
{
name = "libc";
packageId = "libc";
usesDefaultFeatures = false;
}
];
buildDependencies = [
{
name = "vcpkg";
packageId = "vcpkg";
}
];
features = {
"default" = [ "v5-38" ];
"v5-05" = [ "v5-04" ];
"v5-10" = [ "v5-05" ];
"v5-13" = [ "v5-10" ];
"v5-20" = [ "v5-13" ];
"v5-21" = [ "v5-20" ];
"v5-22" = [ "v5-21" ];
"v5-23" = [ "v5-22" ];
"v5-25" = [ "v5-23" ];
"v5-27" = [ "v5-25" ];
"v5-32" = [ "v5-27" ];
"v5-35" = [ "v5-32" ];
"v5-38" = [ "v5-35" ];
"v5-40" = [ "v5-38" ];
};
resolvedDefaultFeatures = [ "default" "v5-04" "v5-05" "v5-10" "v5-13" "v5-20" "v5-21" "v5-22" "v5-23" "v5-25" "v5-27" "v5-32" "v5-35" "v5-38" ];
};
"matchit" = rec {
crateName = "matchit";
version = "0.7.3";
@ -4340,6 +4614,7 @@ rec {
"simd" = [ "simd-adler32" ];
"simd-adler32" = [ "dep:simd-adler32" ];
};
resolvedDefaultFeatures = [ "with-alloc" ];
};
"mio" = rec {
crateName = "mio";
@ -6410,6 +6685,26 @@ rec {
];
};
"redox_syscall 0.3.5" = rec {
crateName = "redox_syscall";
version = "0.3.5";
edition = "2018";
sha256 = "0acgiy2lc1m2vr8cr33l5s7k9wzby8dybyab1a9p753hcbr68xjn";
libName = "syscall";
authors = [
"Jeremy Soller <jackpot51@gmail.com>"
];
dependencies = [
{
name = "bitflags";
packageId = "bitflags 1.3.2";
}
];
features = {
"core" = [ "dep:core" ];
"rustc-dep-of-std" = [ "core" "bitflags/rustc-dep-of-std" ];
};
};
"redox_syscall 0.4.1" = rec {
crateName = "redox_syscall";
version = "0.4.1";
@ -9139,6 +9434,65 @@ rec {
};
resolvedDefaultFeatures = [ "default" "fs" "net" "time" ];
};
"tokio-tar" = rec {
crateName = "tokio-tar";
version = "0.3.1";
edition = "2018";
sha256 = "0xffvap4g7hlswk5daklk3jaqha6s6wxw72c24kmqgna23018mwx";
authors = [
"Alex Crichton <alex@alexcrichton.com>"
"dignifiedquire <me@dignifiequire.com>"
"Artem Vorotnikov <artem@vorotnikov.me>"
"Aiden McClelland <me@drbonez.dev>"
];
dependencies = [
{
name = "filetime";
packageId = "filetime";
}
{
name = "futures-core";
packageId = "futures-core";
}
{
name = "libc";
packageId = "libc";
target = { target, features }: (target."unix" or false);
}
{
name = "redox_syscall";
packageId = "redox_syscall 0.3.5";
target = { target, features }: ("redox" == target."os" or null);
}
{
name = "tokio";
packageId = "tokio";
features = [ "fs" "io-util" "rt" ];
}
{
name = "tokio-stream";
packageId = "tokio-stream";
}
{
name = "xattr";
packageId = "xattr";
optional = true;
target = { target, features }: (target."unix" or false);
}
];
devDependencies = [
{
name = "tokio";
packageId = "tokio";
features = [ "full" ];
}
];
features = {
"default" = [ "xattr" ];
"xattr" = [ "dep:xattr" ];
};
resolvedDefaultFeatures = [ "default" "xattr" ];
};
"tokio-util" = rec {
crateName = "tokio-util";
version = "0.7.10";
@ -10768,6 +11122,11 @@ rec {
then lib.cleanSourceWith { filter = sourceFilter; src = ./glue; }
else ./glue;
dependencies = [
{
name = "async-compression";
packageId = "async-compression";
features = [ "tokio" "gzip" "bzip2" "xz" ];
}
{
name = "async-recursion";
packageId = "async-recursion";
@ -10788,10 +11147,18 @@ rec {
name = "futures";
packageId = "futures";
}
{
name = "magic";
packageId = "magic";
}
{
name = "nix-compat";
packageId = "nix-compat";
}
{
name = "pin-project";
packageId = "pin-project";
}
{
name = "reqwest";
packageId = "reqwest";
@ -10818,6 +11185,10 @@ rec {
name = "tokio";
packageId = "tokio";
}
{
name = "tokio-tar";
packageId = "tokio-tar";
}
{
name = "tokio-util";
packageId = "tokio-util";
@ -11358,6 +11729,16 @@ rec {
};
resolvedDefaultFeatures = [ "alloc" "std" ];
};
"vcpkg" = rec {
crateName = "vcpkg";
version = "0.2.15";
edition = "2015";
sha256 = "09i4nf5y8lig6xgj3f7fyrvzd3nlaw4znrihw8psidvv5yk4xkdc";
authors = [
"Jim McGrath <jimmc2@gmail.com>"
];
};
"version_check" = rec {
crateName = "version_check";
version = "0.9.4";
@ -13269,6 +13650,47 @@ rec {
];
};
"xattr" = rec {
crateName = "xattr";
version = "1.3.1";
edition = "2021";
sha256 = "0kqxm36w89vc6qcpn6pizlhgjgzq138sx4hdhbv2g6wk4ld4za4d";
authors = [
"Steven Allen <steven@stebalien.com>"
];
dependencies = [
{
name = "libc";
packageId = "libc";
target = { target, features }: (("freebsd" == target."os" or null) || ("netbsd" == target."os" or null));
}
{
name = "linux-raw-sys";
packageId = "linux-raw-sys";
usesDefaultFeatures = false;
target = { target, features }: ("linux" == target."os" or null);
features = [ "std" ];
}
{
name = "rustix";
packageId = "rustix";
usesDefaultFeatures = false;
features = [ "fs" "std" ];
}
];
devDependencies = [
{
name = "rustix";
packageId = "rustix";
usesDefaultFeatures = false;
features = [ "net" ];
}
];
features = {
"default" = [ "unsupported" ];
};
resolvedDefaultFeatures = [ "default" "unsupported" ];
};
"xml-rs" = rec {
crateName = "xml-rs";
version = "0.8.19";

View file

@ -9,7 +9,9 @@ bstr = "1.6.0"
bytes = "1.4.0"
data-encoding = "2.3.3"
futures = "0.3.30"
magic = "0.16.2"
nix-compat = { path = "../nix-compat" }
pin-project = "1.1"
reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots"], default-features = false }
tvix-build = { path = "../build", default-features = false, features = []}
tvix-eval = { path = "../eval" }
@ -17,6 +19,7 @@ tvix-castore = { path = "../castore" }
tvix-store = { path = "../store", default-features = false, features = []}
tracing = "0.1.37"
tokio = "1.28.0"
tokio-tar = "0.3.1"
tokio-util = { version = "0.7.9", features = ["io", "io-util", "compat"] }
thiserror = "1.0.38"
serde = "1.0.195"
@ -24,6 +27,10 @@ serde_json = "1.0"
sha2 = "0.10.8"
walkdir = "2.4.0"
[dependencies.async-compression]
version = "0.4.6"
features = ["tokio", "gzip", "bzip2", "xz"]
[dependencies.wu-manber]
git = "https://github.com/tvlfyi/wu-manber.git"

View file

@ -0,0 +1,221 @@
#![allow(dead_code)] // TODO
use std::{
io, mem,
pin::Pin,
task::{Context, Poll},
};
use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder};
use futures::ready;
use pin_project::pin_project;
use tokio::io::{AsyncBufRead, AsyncRead, BufReader, ReadBuf};
const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
const BZIP2_MAGIC: [u8; 3] = *b"BZh";
const XZ_MAGIC: [u8; 6] = [0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
const BYTES_NEEDED: usize = 6;
#[derive(Debug, Clone, Copy)]
enum Algorithm {
Gzip,
Bzip2,
Xz,
}
impl Algorithm {
fn from_magic(magic: &[u8]) -> Option<Self> {
if magic.starts_with(&GZIP_MAGIC) {
Some(Self::Gzip)
} else if magic.starts_with(&BZIP2_MAGIC) {
Some(Self::Bzip2)
} else if magic.starts_with(&XZ_MAGIC) {
Some(Self::Xz)
} else {
None
}
}
}
#[pin_project]
struct WithPreexistingBuffer<R> {
buffer: Vec<u8>,
#[pin]
inner: R,
}
impl<R> AsyncRead for WithPreexistingBuffer<R>
where
R: AsyncRead,
{
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<io::Result<()>> {
let this = self.project();
if !this.buffer.is_empty() {
// TODO: check if the buffer fits first
buf.put_slice(this.buffer);
this.buffer.clear();
}
this.inner.poll_read(cx, buf)
}
}
#[pin_project(project = DecompressedReaderInnerProj)]
enum DecompressedReaderInner<R> {
Unknown {
buffer: Vec<u8>,
#[pin]
inner: Option<R>,
},
Gzip(#[pin] GzipDecoder<BufReader<WithPreexistingBuffer<R>>>),
Bzip2(#[pin] BzDecoder<BufReader<WithPreexistingBuffer<R>>>),
Xz(#[pin] XzDecoder<BufReader<WithPreexistingBuffer<R>>>),
}
impl<R> DecompressedReaderInner<R>
where
R: AsyncBufRead,
{
fn switch_to(&mut self, algorithm: Algorithm) {
let (buffer, inner) = match self {
DecompressedReaderInner::Unknown { buffer, inner } => {
(mem::take(buffer), inner.take().unwrap())
}
DecompressedReaderInner::Gzip(_)
| DecompressedReaderInner::Bzip2(_)
| DecompressedReaderInner::Xz(_) => unreachable!(),
};
let inner = BufReader::new(WithPreexistingBuffer { buffer, inner });
*self = match algorithm {
Algorithm::Gzip => Self::Gzip(GzipDecoder::new(inner)),
Algorithm::Bzip2 => Self::Bzip2(BzDecoder::new(inner)),
Algorithm::Xz => Self::Xz(XzDecoder::new(inner)),
}
}
}
impl<R> AsyncRead for DecompressedReaderInner<R>
where
R: AsyncBufRead,
{
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<io::Result<()>> {
match self.project() {
DecompressedReaderInnerProj::Unknown { .. } => {
unreachable!("Can't call poll_read on Unknown")
}
DecompressedReaderInnerProj::Gzip(inner) => inner.poll_read(cx, buf),
DecompressedReaderInnerProj::Bzip2(inner) => inner.poll_read(cx, buf),
DecompressedReaderInnerProj::Xz(inner) => inner.poll_read(cx, buf),
}
}
}
#[pin_project]
pub struct DecompressedReader<R> {
#[pin]
inner: DecompressedReaderInner<R>,
switch_to: Option<Algorithm>,
}
impl<R> DecompressedReader<R> {
pub fn new(inner: R) -> Self {
Self {
inner: DecompressedReaderInner::Unknown {
buffer: vec![0; BYTES_NEEDED],
inner: Some(inner),
},
switch_to: None,
}
}
}
impl<R> AsyncRead for DecompressedReader<R>
where
R: AsyncBufRead + Unpin,
{
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<io::Result<()>> {
let mut this = self.project();
let (buffer, inner) = match this.inner.as_mut().project() {
DecompressedReaderInnerProj::Gzip(inner) => return inner.poll_read(cx, buf),
DecompressedReaderInnerProj::Bzip2(inner) => return inner.poll_read(cx, buf),
DecompressedReaderInnerProj::Xz(inner) => return inner.poll_read(cx, buf),
DecompressedReaderInnerProj::Unknown { buffer, inner } => (buffer, inner),
};
let mut our_buf = ReadBuf::new(buffer);
if let Err(e) = ready!(inner.as_pin_mut().unwrap().poll_read(cx, &mut our_buf)) {
return Poll::Ready(Err(e));
}
let data = our_buf.filled();
if data.len() >= BYTES_NEEDED {
if let Some(algorithm) = Algorithm::from_magic(data) {
this.inner.as_mut().switch_to(algorithm);
} else {
return Poll::Ready(Err(io::Error::new(
io::ErrorKind::InvalidData,
"tar data not gz, bzip2, or xz compressed",
)));
}
this.inner.poll_read(cx, buf)
} else {
cx.waker().wake_by_ref();
Poll::Pending
}
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use async_compression::tokio::bufread::GzipEncoder;
use futures::TryStreamExt;
use test_case::test_case;
use tokio::io::{AsyncReadExt, BufReader};
use tokio_tar::Archive;
use super::*;
#[tokio::test]
async fn gzip() {
let data = b"abcdefghijk";
let mut enc = GzipEncoder::new(&data[..]);
let mut gzipped = vec![];
enc.read_to_end(&mut gzipped).await.unwrap();
let mut reader = DecompressedReader::new(BufReader::new(&gzipped[..]));
let mut round_tripped = vec![];
reader.read_to_end(&mut round_tripped).await.unwrap();
assert_eq!(data[..], round_tripped[..]);
}
#[test_case(include_bytes!("tests/blob.tar.gz"); "gzip")]
#[test_case(include_bytes!("tests/blob.tar.bz2"); "bzip2")]
#[test_case(include_bytes!("tests/blob.tar.xz"); "xz")]
#[tokio::test]
async fn compressed_tar(data: &[u8]) {
let reader = DecompressedReader::new(BufReader::new(data));
let mut archive = Archive::new(reader);
let mut entries: Vec<_> = archive.entries().unwrap().try_collect().await.unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].path().unwrap().as_ref(), Path::new("empty"));
let mut data = String::new();
entries[0].read_to_string(&mut data).await.unwrap();
assert_eq!(data, "");
}
}

View file

@ -5,6 +5,7 @@ pub mod tvix_build;
pub mod tvix_io;
pub mod tvix_store_io;
mod decompression;
#[cfg(test)]
mod tests;

Binary file not shown.

Binary file not shown.

Binary file not shown.