feat(users/edef/weave): use tracing_indicatif for progress

Progress bars :3

Change-Id: I770d0f8381521b6efc8b38c0db4d59c771887fee
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12673
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
edef 2024-10-19 19:23:56 +00:00
parent b3f0e25fbc
commit 84a82f6f41
6 changed files with 1071 additions and 114 deletions

View file

@ -99,6 +99,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76"
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]] [[package]]
name = "arrow-format" name = "arrow-format"
version = "0.8.1" version = "0.8.1"
@ -233,7 +239,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
dependencies = [ dependencies = [
"memchr", "memchr",
"regex-automata", "regex-automata 0.4.8",
"serde", "serde",
] ]
@ -316,6 +322,19 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys",
]
[[package]] [[package]]
name = "const-oid" name = "const-oid"
version = "0.9.6" version = "0.9.6"
@ -510,6 +529,12 @@ version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]] [[package]]
name = "enum-primitive-derive" name = "enum-primitive-derive"
version = "0.3.0" version = "0.3.0"
@ -780,6 +805,29 @@ dependencies = [
"hashbrown 0.15.0", "hashbrown 0.15.0",
] ]
[[package]]
name = "indicatif"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"unicode-width",
"vt100",
]
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.11" version = "1.0.11"
@ -804,6 +852,12 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.159" version = "0.2.159"
@ -861,6 +915,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.7.4" version = "2.7.4"
@ -996,6 +1059,16 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.19" version = "0.2.19"
@ -1006,6 +1079,12 @@ dependencies = [
"libm", "libm",
] ]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]] [[package]]
name = "object" name = "object"
version = "0.36.5" version = "0.36.5"
@ -1021,6 +1100,12 @@ version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
version = "0.12.3" version = "0.12.3"
@ -1425,6 +1510,12 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "portable-atomic"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.20" version = "0.2.20"
@ -1529,8 +1620,17 @@ checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
"regex-automata", "regex-automata 0.4.8",
"regex-syntax", "regex-syntax 0.8.5",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
] ]
[[package]] [[package]]
@ -1541,9 +1641,15 @@ checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
"regex-syntax", "regex-syntax 0.8.5",
] ]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.5" version = "0.8.5"
@ -1653,6 +1759,15 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]] [[package]]
name = "shlex" name = "shlex"
version = "1.3.0" version = "1.3.0"
@ -1868,6 +1983,16 @@ dependencies = [
"syn 2.0.79", "syn 2.0.79",
] ]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "1.40.0" version = "1.40.0"
@ -1937,6 +2062,60 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [ dependencies = [
"once_cell", "once_cell",
"valuable",
]
[[package]]
name = "tracing-indicatif"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "069580424efe11d97c3fef4197fa98c004fa26672cc71ad8770d224e23b1951d"
dependencies = [
"indicatif",
"tracing",
"tracing-core",
"tracing-subscriber",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
name = "tvix-tracing"
version = "0.1.0"
dependencies = [
"indicatif",
"thiserror",
"tokio",
"tracing",
"tracing-indicatif",
"tracing-subscriber",
] ]
[[package]] [[package]]
@ -1957,12 +2136,57 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.5" version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "vt100"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84cd863bf0db7e392ba3bd04994be3473491b31e66340672af5d11943c6274de"
dependencies = [
"itoa",
"log",
"unicode-width",
"vte",
]
[[package]]
name = "vte"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197"
dependencies = [
"arrayvec",
"utf8parse",
"vte_generate_state_changes",
]
[[package]]
name = "vte_generate_state_changes"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e369bee1b05d510a7b4ed645f5faa90619e05437111783ea5848f28d97d3c2e"
dependencies = [
"proc-macro2",
"quote",
]
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.11.0+wasi-snapshot-preview1" version = "0.11.0+wasi-snapshot-preview1"
@ -2035,6 +2259,9 @@ dependencies = [
"rayon", "rayon",
"rustc-hash", "rustc-hash",
"safer_owning_ref", "safer_owning_ref",
"tracing",
"tracing-indicatif",
"tvix-tracing",
] ]
[[package]] [[package]]

View file

@ -329,6 +329,21 @@ rec {
libName = "array_init_cursor"; libName = "array_init_cursor";
}; };
"arrayvec" = rec {
crateName = "arrayvec";
version = "0.7.6";
edition = "2018";
sha256 = "0l1fz4ccgv6pm609rif37sl5nv5k6lbzi7kkppgzqzh1vwix20kw";
authors = [
"bluss"
];
features = {
"borsh" = [ "dep:borsh" ];
"default" = [ "std" ];
"serde" = [ "dep:serde" ];
"zeroize" = [ "dep:zeroize" ];
};
};
"arrow-format" = rec { "arrow-format" = rec {
crateName = "arrow-format"; crateName = "arrow-format";
version = "0.8.1"; version = "0.8.1";
@ -692,7 +707,7 @@ rec {
} }
{ {
name = "regex-automata"; name = "regex-automata";
packageId = "regex-automata"; packageId = "regex-automata 0.4.8";
optional = true; optional = true;
usesDefaultFeatures = false; usesDefaultFeatures = false;
features = [ "dfa-search" ]; features = [ "dfa-search" ];
@ -953,6 +968,47 @@ rec {
}; };
resolvedDefaultFeatures = [ "crossterm" "tty" ]; resolvedDefaultFeatures = [ "crossterm" "tty" ];
}; };
"console" = rec {
crateName = "console";
version = "0.15.8";
edition = "2018";
sha256 = "1sz4nl9nz8pkmapqni6py7jxzi7nzqjxzb3ya4kxvmkb0zy867qf";
authors = [
"Armin Ronacher <armin.ronacher@active-4.com>"
];
dependencies = [
{
name = "encode_unicode";
packageId = "encode_unicode";
target = { target, features }: (target."windows" or false);
}
{
name = "lazy_static";
packageId = "lazy_static";
}
{
name = "libc";
packageId = "libc";
}
{
name = "unicode-width";
packageId = "unicode-width";
optional = true;
}
{
name = "windows-sys";
packageId = "windows-sys";
target = { target, features }: (target."windows" or false);
features = [ "Win32_Foundation" "Win32_System_Console" "Win32_Storage_FileSystem" "Win32_UI_Input_KeyboardAndMouse" ];
}
];
features = {
"default" = [ "unicode-width" "ansi-parsing" ];
"unicode-width" = [ "dep:unicode-width" ];
"windows-console-colors" = [ "ansi-parsing" ];
};
resolvedDefaultFeatures = [ "ansi-parsing" "unicode-width" ];
};
"const-oid" = rec { "const-oid" = rec {
crateName = "const-oid"; crateName = "const-oid";
version = "0.9.6"; version = "0.9.6";
@ -1531,6 +1587,21 @@ rec {
}; };
resolvedDefaultFeatures = [ "default" "use_std" ]; resolvedDefaultFeatures = [ "default" "use_std" ];
}; };
"encode_unicode" = rec {
crateName = "encode_unicode";
version = "0.3.6";
edition = "2015";
sha256 = "07w3vzrhxh9lpjgsg2y5bwzfar2aq35mdznvcp3zjl0ssj7d4mx3";
authors = [
"Torbjørn Birch Moltu <t.b.moltu@lyse.net>"
];
features = {
"ascii" = [ "dep:ascii" ];
"clippy" = [ "dep:clippy" ];
"default" = [ "std" ];
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"enum-primitive-derive" = rec { "enum-primitive-derive" = rec {
crateName = "enum-primitive-derive"; crateName = "enum-primitive-derive";
version = "0.3.0"; version = "0.3.0";
@ -2307,6 +2378,77 @@ rec {
}; };
resolvedDefaultFeatures = [ "default" "std" ]; resolvedDefaultFeatures = [ "default" "std" ];
}; };
"indicatif" = rec {
crateName = "indicatif";
version = "0.17.8";
edition = "2021";
sha256 = "18xyqxw9i5x4sbpzckhfz3nm984iq9r7nbi2lk76nz888n7mlfkn";
dependencies = [
{
name = "console";
packageId = "console";
usesDefaultFeatures = false;
features = [ "ansi-parsing" ];
}
{
name = "instant";
packageId = "instant";
target = { target, features }: ("wasm32" == target."arch" or null);
}
{
name = "number_prefix";
packageId = "number_prefix";
}
{
name = "portable-atomic";
packageId = "portable-atomic";
}
{
name = "unicode-width";
packageId = "unicode-width";
optional = true;
}
{
name = "vt100";
packageId = "vt100";
optional = true;
}
];
features = {
"default" = [ "unicode-width" "console/unicode-width" ];
"futures" = [ "dep:futures-core" ];
"improved_unicode" = [ "unicode-segmentation" "unicode-width" "console/unicode-width" ];
"in_memory" = [ "vt100" ];
"rayon" = [ "dep:rayon" ];
"tokio" = [ "dep:tokio" ];
"unicode-segmentation" = [ "dep:unicode-segmentation" ];
"unicode-width" = [ "dep:unicode-width" ];
"vt100" = [ "dep:vt100" ];
};
resolvedDefaultFeatures = [ "default" "in_memory" "unicode-width" "vt100" ];
};
"instant" = rec {
crateName = "instant";
version = "0.1.13";
edition = "2018";
sha256 = "08h27kzvb5jw74mh0ajv0nv9ggwvgqm8ynjsn2sa9jsks4cjh970";
authors = [
"sebcrozet <developer@crozet.re>"
];
dependencies = [
{
name = "cfg-if";
packageId = "cfg-if";
}
];
features = {
"js-sys" = [ "dep:js-sys" ];
"stdweb" = [ "dep:stdweb" ];
"wasm-bindgen" = [ "js-sys" "wasm-bindgen_rs" "web-sys" ];
"wasm-bindgen_rs" = [ "dep:wasm-bindgen_rs" ];
"web-sys" = [ "dep:web-sys" ];
};
};
"itoa" = rec { "itoa" = rec {
crateName = "itoa"; crateName = "itoa";
version = "1.0.11"; version = "1.0.11";
@ -2353,6 +2495,19 @@ rec {
]; ];
}; };
"lazy_static" = rec {
crateName = "lazy_static";
version = "1.5.0";
edition = "2015";
sha256 = "1zk6dqqni0193xg6iijh7i3i44sryglwgvx20spdvwk3r6sbrlmv";
authors = [
"Marvin Löbel <loebel.marvin@gmail.com>"
];
features = {
"spin" = [ "dep:spin" ];
"spin_no_std" = [ "spin" ];
};
};
"libc" = rec { "libc" = rec {
crateName = "libc"; crateName = "libc";
version = "0.2.159"; version = "0.2.159";
@ -2459,6 +2614,7 @@ rec {
"sval_ref" = [ "dep:sval_ref" ]; "sval_ref" = [ "dep:sval_ref" ];
"value-bag" = [ "dep:value-bag" ]; "value-bag" = [ "dep:value-bag" ];
}; };
resolvedDefaultFeatures = [ "std" ];
}; };
"lz4" = rec { "lz4" = rec {
crateName = "lz4"; crateName = "lz4";
@ -2504,6 +2660,22 @@ rec {
} }
]; ];
};
"matchers" = rec {
crateName = "matchers";
version = "0.1.0";
edition = "2018";
sha256 = "0n2mbk7lg2vf962c8xwzdq96yrc9i0p8dbmm4wa1nnkcp1dhfqw2";
authors = [
"Eliza Weisman <eliza@buoyant.io>"
];
dependencies = [
{
name = "regex-automata";
packageId = "regex-automata 0.1.10";
}
];
}; };
"memchr" = rec { "memchr" = rec {
crateName = "memchr"; crateName = "memchr";
@ -2923,6 +3095,35 @@ rec {
}; };
resolvedDefaultFeatures = [ "default" "user" ]; resolvedDefaultFeatures = [ "default" "user" ];
}; };
"nu-ansi-term" = rec {
crateName = "nu-ansi-term";
version = "0.46.0";
edition = "2018";
sha256 = "115sywxh53p190lyw97alm14nc004qj5jm5lvdj608z84rbida3p";
libName = "nu_ansi_term";
authors = [
"ogham@bsago.me"
"Ryan Scheel (Havvy) <ryan.havvy@gmail.com>"
"Josh Triplett <josh@joshtriplett.org>"
"The Nushell Project Developers"
];
dependencies = [
{
name = "overload";
packageId = "overload";
}
{
name = "winapi";
packageId = "winapi";
target = { target, features }: ("windows" == target."os" or null);
features = [ "consoleapi" "errhandlingapi" "fileapi" "handleapi" "processenv" ];
}
];
features = {
"derive_serde_style" = [ "serde" ];
"serde" = [ "dep:serde" ];
};
};
"num-traits" = rec { "num-traits" = rec {
crateName = "num-traits"; crateName = "num-traits";
version = "0.2.19"; version = "0.2.19";
@ -2951,6 +3152,19 @@ rec {
}; };
resolvedDefaultFeatures = [ "default" "libm" "std" ]; resolvedDefaultFeatures = [ "default" "libm" "std" ];
}; };
"number_prefix" = rec {
crateName = "number_prefix";
version = "0.4.0";
edition = "2015";
sha256 = "1wvh13wvlajqxkb1filsfzbrnq0vrmrw298v2j3sy82z1rm282w3";
authors = [
"Benjamin Sago <ogham@bsago.me>"
];
features = {
"default" = [ "std" ];
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"object" = rec { "object" = rec {
crateName = "object"; crateName = "object";
version = "0.36.5"; version = "0.36.5";
@ -3004,6 +3218,16 @@ rec {
}; };
resolvedDefaultFeatures = [ "alloc" "default" "race" "std" ]; resolvedDefaultFeatures = [ "alloc" "default" "race" "std" ];
}; };
"overload" = rec {
crateName = "overload";
version = "0.1.1";
edition = "2018";
sha256 = "0fdgbaqwknillagy1xq7xfgv60qdbk010diwl7s1p0qx7hb16n5i";
authors = [
"Daniel Salvadori <danaugrs@gmail.com>"
];
};
"parking_lot" = rec { "parking_lot" = rec {
crateName = "parking_lot"; crateName = "parking_lot";
version = "0.12.3"; version = "0.12.3";
@ -5024,6 +5248,19 @@ rec {
}; };
resolvedDefaultFeatures = [ "sysinfo" ]; resolvedDefaultFeatures = [ "sysinfo" ];
}; };
"portable-atomic" = rec {
crateName = "portable-atomic";
version = "1.9.0";
edition = "2018";
sha256 = "1cmd87qj90panwsi350djb8lsxdryqkkxmimjcz7a1nsysini76c";
libName = "portable_atomic";
features = {
"critical-section" = [ "dep:critical-section" ];
"default" = [ "fallback" ];
"serde" = [ "dep:serde" ];
};
resolvedDefaultFeatures = [ "default" "fallback" ];
};
"ppv-lite86" = rec { "ppv-lite86" = rec {
crateName = "ppv-lite86"; crateName = "ppv-lite86";
version = "0.2.20"; version = "0.2.20";
@ -5319,13 +5556,13 @@ rec {
} }
{ {
name = "regex-automata"; name = "regex-automata";
packageId = "regex-automata"; packageId = "regex-automata 0.4.8";
usesDefaultFeatures = false; usesDefaultFeatures = false;
features = [ "alloc" "syntax" "meta" "nfa-pikevm" ]; features = [ "alloc" "syntax" "meta" "nfa-pikevm" ];
} }
{ {
name = "regex-syntax"; name = "regex-syntax";
packageId = "regex-syntax"; packageId = "regex-syntax 0.8.5";
usesDefaultFeatures = false; usesDefaultFeatures = false;
} }
]; ];
@ -5353,7 +5590,32 @@ rec {
}; };
resolvedDefaultFeatures = [ "default" "perf" "perf-backtrack" "perf-cache" "perf-dfa" "perf-inline" "perf-literal" "perf-onepass" "std" "unicode" "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" ]; resolvedDefaultFeatures = [ "default" "perf" "perf-backtrack" "perf-cache" "perf-dfa" "perf-inline" "perf-literal" "perf-onepass" "std" "unicode" "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" ];
}; };
"regex-automata" = rec { "regex-automata 0.1.10" = rec {
crateName = "regex-automata";
version = "0.1.10";
edition = "2015";
sha256 = "0ci1hvbzhrfby5fdpf4ganhf7kla58acad9i1ff1p34dzdrhs8vc";
libName = "regex_automata";
authors = [
"Andrew Gallant <jamslam@gmail.com>"
];
dependencies = [
{
name = "regex-syntax";
packageId = "regex-syntax 0.6.29";
optional = true;
}
];
features = {
"default" = [ "std" ];
"fst" = [ "dep:fst" ];
"regex-syntax" = [ "dep:regex-syntax" ];
"std" = [ "regex-syntax" ];
"transducer" = [ "std" "fst" ];
};
resolvedDefaultFeatures = [ "default" "regex-syntax" "std" ];
};
"regex-automata 0.4.8" = rec {
crateName = "regex-automata"; crateName = "regex-automata";
version = "0.4.8"; version = "0.4.8";
edition = "2021"; edition = "2021";
@ -5378,7 +5640,7 @@ rec {
} }
{ {
name = "regex-syntax"; name = "regex-syntax";
packageId = "regex-syntax"; packageId = "regex-syntax 0.8.5";
optional = true; optional = true;
usesDefaultFeatures = false; usesDefaultFeatures = false;
} }
@ -5414,7 +5676,22 @@ rec {
}; };
resolvedDefaultFeatures = [ "alloc" "dfa-onepass" "dfa-search" "hybrid" "meta" "nfa-backtrack" "nfa-pikevm" "nfa-thompson" "perf-inline" "perf-literal" "perf-literal-multisubstring" "perf-literal-substring" "std" "syntax" "unicode" "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" "unicode-word-boundary" ]; resolvedDefaultFeatures = [ "alloc" "dfa-onepass" "dfa-search" "hybrid" "meta" "nfa-backtrack" "nfa-pikevm" "nfa-thompson" "perf-inline" "perf-literal" "perf-literal-multisubstring" "perf-literal-substring" "std" "syntax" "unicode" "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" "unicode-word-boundary" ];
}; };
"regex-syntax" = rec { "regex-syntax 0.6.29" = rec {
crateName = "regex-syntax";
version = "0.6.29";
edition = "2018";
sha256 = "1qgj49vm6y3zn1hi09x91jvgkl2b1fiaq402skj83280ggfwcqpi";
libName = "regex_syntax";
authors = [
"The Rust Project Developers"
];
features = {
"default" = [ "unicode" ];
"unicode" = [ "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" ];
};
resolvedDefaultFeatures = [ "default" "unicode" "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" ];
};
"regex-syntax 0.8.5" = rec {
crateName = "regex-syntax"; crateName = "regex-syntax";
version = "0.8.5"; version = "0.8.5";
edition = "2021"; edition = "2021";
@ -5706,6 +5983,25 @@ rec {
}; };
resolvedDefaultFeatures = [ "default" "std" ]; resolvedDefaultFeatures = [ "default" "std" ];
}; };
"sharded-slab" = rec {
crateName = "sharded-slab";
version = "0.1.7";
edition = "2018";
sha256 = "1xipjr4nqsgw34k7a2cgj9zaasl2ds6jwn89886kww93d32a637l";
libName = "sharded_slab";
authors = [
"Eliza Weisman <eliza@buoyant.io>"
];
dependencies = [
{
name = "lazy_static";
packageId = "lazy_static";
}
];
features = {
"loom" = [ "dep:loom" ];
};
};
"shlex" = rec { "shlex" = rec {
crateName = "shlex"; crateName = "shlex";
version = "1.3.0"; version = "1.3.0";
@ -6248,6 +6544,26 @@ rec {
]; ];
}; };
"thread_local" = rec {
crateName = "thread_local";
version = "1.1.8";
edition = "2021";
sha256 = "173i5lyjh011gsimk21np9jn8al18rxsrkjli20a7b8ks2xgk7lb";
authors = [
"Amanieu d'Antras <amanieu@gmail.com>"
];
dependencies = [
{
name = "cfg-if";
packageId = "cfg-if";
}
{
name = "once_cell";
packageId = "once_cell";
}
];
features = { };
};
"tokio" = rec { "tokio" = rec {
crateName = "tokio"; crateName = "tokio";
version = "1.40.0"; version = "1.40.0";
@ -6457,7 +6773,7 @@ rec {
"tracing-attributes" = [ "dep:tracing-attributes" ]; "tracing-attributes" = [ "dep:tracing-attributes" ];
"valuable" = [ "tracing-core/valuable" ]; "valuable" = [ "tracing-core/valuable" ];
}; };
resolvedDefaultFeatures = [ "attributes" "default" "std" "tracing-attributes" ]; resolvedDefaultFeatures = [ "attributes" "default" "max_level_trace" "release_max_level_debug" "std" "tracing-attributes" ];
}; };
"tracing-attributes" = rec { "tracing-attributes" = rec {
crateName = "tracing-attributes"; crateName = "tracing-attributes";
@ -6504,6 +6820,13 @@ rec {
packageId = "once_cell"; packageId = "once_cell";
optional = true; optional = true;
} }
{
name = "valuable";
packageId = "valuable";
optional = true;
usesDefaultFeatures = false;
target = { target, features }: (target."tracing_unstable" or false);
}
]; ];
features = { features = {
"default" = [ "std" "valuable/std" ]; "default" = [ "std" "valuable/std" ];
@ -6511,7 +6834,224 @@ rec {
"std" = [ "once_cell" ]; "std" = [ "once_cell" ];
"valuable" = [ "dep:valuable" ]; "valuable" = [ "dep:valuable" ];
}; };
resolvedDefaultFeatures = [ "once_cell" "std" ]; resolvedDefaultFeatures = [ "default" "once_cell" "std" "valuable" ];
};
"tracing-indicatif" = rec {
crateName = "tracing-indicatif";
version = "0.3.6";
edition = "2021";
sha256 = "07cmn4ilw8hdfzc1mirccwkgl160k3x9fhgg7xydj4gy9r181586";
libName = "tracing_indicatif";
dependencies = [
{
name = "indicatif";
packageId = "indicatif";
features = [ "in_memory" ];
}
{
name = "tracing";
packageId = "tracing";
}
{
name = "tracing-core";
packageId = "tracing-core";
}
{
name = "tracing-subscriber";
packageId = "tracing-subscriber";
}
];
};
"tracing-log" = rec {
crateName = "tracing-log";
version = "0.2.0";
edition = "2018";
sha256 = "1hs77z026k730ij1a9dhahzrl0s073gfa2hm5p0fbl0b80gmz1gf";
libName = "tracing_log";
authors = [
"Tokio Contributors <team@tokio.rs>"
];
dependencies = [
{
name = "log";
packageId = "log";
}
{
name = "once_cell";
packageId = "once_cell";
}
{
name = "tracing-core";
packageId = "tracing-core";
}
];
features = {
"ahash" = [ "dep:ahash" ];
"default" = [ "log-tracer" "std" ];
"interest-cache" = [ "lru" "ahash" ];
"lru" = [ "dep:lru" ];
"std" = [ "log/std" ];
};
resolvedDefaultFeatures = [ "log-tracer" "std" ];
};
"tracing-subscriber" = rec {
crateName = "tracing-subscriber";
version = "0.3.18";
edition = "2018";
sha256 = "12vs1bwk4kig1l2qqjbbn2nm5amwiqmkcmnznylzmnfvjy6083xd";
libName = "tracing_subscriber";
authors = [
"Eliza Weisman <eliza@buoyant.io>"
"David Barsky <me@davidbarsky.com>"
"Tokio Contributors <team@tokio.rs>"
];
dependencies = [
{
name = "matchers";
packageId = "matchers";
optional = true;
}
{
name = "nu-ansi-term";
packageId = "nu-ansi-term";
optional = true;
}
{
name = "once_cell";
packageId = "once_cell";
optional = true;
}
{
name = "regex";
packageId = "regex";
optional = true;
usesDefaultFeatures = false;
features = [ "std" "unicode-case" "unicode-perl" ];
}
{
name = "sharded-slab";
packageId = "sharded-slab";
optional = true;
}
{
name = "smallvec";
packageId = "smallvec";
optional = true;
}
{
name = "thread_local";
packageId = "thread_local";
optional = true;
}
{
name = "tracing";
packageId = "tracing";
optional = true;
usesDefaultFeatures = false;
}
{
name = "tracing-core";
packageId = "tracing-core";
usesDefaultFeatures = false;
}
{
name = "tracing-log";
packageId = "tracing-log";
optional = true;
usesDefaultFeatures = false;
features = [ "log-tracer" "std" ];
}
];
devDependencies = [
{
name = "regex";
packageId = "regex";
usesDefaultFeatures = false;
features = [ "std" ];
}
{
name = "tracing";
packageId = "tracing";
}
{
name = "tracing-log";
packageId = "tracing-log";
}
];
features = {
"ansi" = [ "fmt" "nu-ansi-term" ];
"chrono" = [ "dep:chrono" ];
"default" = [ "smallvec" "fmt" "ansi" "tracing-log" "std" ];
"env-filter" = [ "matchers" "regex" "once_cell" "tracing" "std" "thread_local" ];
"fmt" = [ "registry" "std" ];
"json" = [ "tracing-serde" "serde" "serde_json" ];
"local-time" = [ "time/local-offset" ];
"matchers" = [ "dep:matchers" ];
"nu-ansi-term" = [ "dep:nu-ansi-term" ];
"once_cell" = [ "dep:once_cell" ];
"parking_lot" = [ "dep:parking_lot" ];
"regex" = [ "dep:regex" ];
"registry" = [ "sharded-slab" "thread_local" "std" ];
"serde" = [ "dep:serde" ];
"serde_json" = [ "dep:serde_json" ];
"sharded-slab" = [ "dep:sharded-slab" ];
"smallvec" = [ "dep:smallvec" ];
"std" = [ "alloc" "tracing-core/std" ];
"thread_local" = [ "dep:thread_local" ];
"time" = [ "dep:time" ];
"tracing" = [ "dep:tracing" ];
"tracing-log" = [ "dep:tracing-log" ];
"tracing-serde" = [ "dep:tracing-serde" ];
"valuable" = [ "tracing-core/valuable" "valuable_crate" "valuable-serde" "tracing-serde/valuable" ];
"valuable-serde" = [ "dep:valuable-serde" ];
"valuable_crate" = [ "dep:valuable_crate" ];
};
resolvedDefaultFeatures = [ "alloc" "ansi" "default" "env-filter" "fmt" "matchers" "nu-ansi-term" "once_cell" "regex" "registry" "sharded-slab" "smallvec" "std" "thread_local" "tracing" "tracing-log" ];
};
"tvix-tracing" = rec {
crateName = "tvix-tracing";
version = "0.1.0";
edition = "2021";
src = lib.cleanSourceWith { filter = sourceFilter; src = ../../../tvix/tracing; };
libName = "tvix_tracing";
dependencies = [
{
name = "indicatif";
packageId = "indicatif";
}
{
name = "thiserror";
packageId = "thiserror";
}
{
name = "tokio";
packageId = "tokio";
features = [ "sync" "rt" ];
}
{
name = "tracing";
packageId = "tracing";
features = [ "max_level_trace" "release_max_level_debug" ];
}
{
name = "tracing-indicatif";
packageId = "tracing-indicatif";
}
{
name = "tracing-subscriber";
packageId = "tracing-subscriber";
features = [ "env-filter" ];
}
];
features = {
"axum" = [ "dep:axum" ];
"otlp" = [ "dep:tracing-opentelemetry" "dep:opentelemetry" "dep:opentelemetry-otlp" "dep:opentelemetry_sdk" "dep:opentelemetry-http" "reqwest-tracing?/opentelemetry_0_22" ];
"reqwest" = [ "dep:reqwest-tracing" ];
"tonic" = [ "dep:tonic" "dep:http" ];
"tracy" = [ "dep:tracing-tracy" ];
};
resolvedDefaultFeatures = [ "default" ];
}; };
"typenum" = rec { "typenum" = rec {
crateName = "typenum"; crateName = "typenum";
@ -6558,6 +7098,31 @@ rec {
}; };
resolvedDefaultFeatures = [ "cjk" "default" ]; resolvedDefaultFeatures = [ "cjk" "default" ];
}; };
"utf8parse" = rec {
crateName = "utf8parse";
version = "0.2.2";
edition = "2018";
sha256 = "088807qwjq46azicqwbhlmzwrbkz7l4hpw43sdkdyyk524vdxaq6";
authors = [
"Joe Wilm <joe@jwilm.com>"
"Christian Duerr <contact@christianduerr.com>"
];
features = { };
resolvedDefaultFeatures = [ "default" ];
};
"valuable" = rec {
crateName = "valuable";
version = "0.1.0";
edition = "2018";
sha256 = "0v9gp3nkjbl30z0fd56d8mx7w1csk86wwjhfjhr400wh9mfpw2w3";
features = {
"default" = [ "std" ];
"derive" = [ "valuable-derive" ];
"std" = [ "alloc" ];
"valuable-derive" = [ "dep:valuable-derive" ];
};
resolvedDefaultFeatures = [ "alloc" "std" ];
};
"version_check" = rec { "version_check" = rec {
crateName = "version_check"; crateName = "version_check";
version = "0.9.5"; version = "0.9.5";
@ -6567,6 +7132,97 @@ rec {
"Sergio Benitez <sb@sergio.bz>" "Sergio Benitez <sb@sergio.bz>"
]; ];
};
"vt100" = rec {
crateName = "vt100";
version = "0.15.2";
edition = "2021";
sha256 = "1pklc8y984axmxr0cd363srr2d27wd5rj15xlcmkjznvy0xqdkc4";
authors = [
"Jesse Luehrs <doy@tozt.net>"
];
dependencies = [
{
name = "itoa";
packageId = "itoa";
}
{
name = "log";
packageId = "log";
}
{
name = "unicode-width";
packageId = "unicode-width";
}
{
name = "vte";
packageId = "vte";
}
];
devDependencies = [
{
name = "vte";
packageId = "vte";
}
];
};
"vte" = rec {
crateName = "vte";
version = "0.11.1";
edition = "2021";
sha256 = "15r1ff4j8ndqj9vsyil3wqwxhhl7jsz5g58f31n0h1wlpxgjn0pm";
authors = [
"Joe Wilm <joe@jwilm.com>"
"Christian Duerr <contact@christianduerr.com>"
];
dependencies = [
{
name = "arrayvec";
packageId = "arrayvec";
optional = true;
usesDefaultFeatures = false;
}
{
name = "utf8parse";
packageId = "utf8parse";
}
{
name = "vte_generate_state_changes";
packageId = "vte_generate_state_changes";
}
];
features = {
"ansi" = [ "log" ];
"arrayvec" = [ "dep:arrayvec" ];
"default" = [ "no_std" ];
"log" = [ "dep:log" ];
"nightly" = [ "utf8parse/nightly" ];
"no_std" = [ "arrayvec" ];
"serde" = [ "dep:serde" ];
};
resolvedDefaultFeatures = [ "arrayvec" "default" "no_std" ];
};
"vte_generate_state_changes" = rec {
crateName = "vte_generate_state_changes";
version = "0.1.2";
edition = "2018";
sha256 = "0biwgpcji3w4llz7h4bi8c2rwqchm9gmyr7dnjki1m853gp9ndif";
procMacro = true;
authors = [
"Christian Duerr <contact@christianduerr.com>"
];
dependencies = [
{
name = "proc-macro2";
packageId = "proc-macro2";
}
{
name = "quote";
packageId = "quote";
}
];
}; };
"wasi" = rec { "wasi" = rec {
crateName = "wasi"; crateName = "wasi";
@ -6789,6 +7445,18 @@ rec {
name = "safer_owning_ref"; name = "safer_owning_ref";
packageId = "safer_owning_ref"; packageId = "safer_owning_ref";
} }
{
name = "tracing";
packageId = "tracing";
}
{
name = "tracing-indicatif";
packageId = "tracing-indicatif";
}
{
name = "tvix-tracing";
packageId = "tvix-tracing";
}
]; ];
}; };
@ -6815,7 +7483,7 @@ rec {
features = { features = {
"debug" = [ "impl-debug" ]; "debug" = [ "impl-debug" ];
}; };
resolvedDefaultFeatures = [ "cfg" "consoleapi" "evntrace" "handleapi" "impl-default" "in6addr" "inaddr" "minwinbase" "ntsecapi" "processenv" "synchapi" "winbase" "windef" "winerror" "winioctl" "winuser" ]; resolvedDefaultFeatures = [ "cfg" "consoleapi" "errhandlingapi" "evntrace" "fileapi" "handleapi" "impl-default" "in6addr" "inaddr" "minwinbase" "ntsecapi" "processenv" "synchapi" "winbase" "windef" "winerror" "winioctl" "winuser" ];
}; };
"winapi-i686-pc-windows-gnu" = rec { "winapi-i686-pc-windows-gnu" = rec {
crateName = "winapi-i686-pc-windows-gnu"; crateName = "winapi-i686-pc-windows-gnu";
@ -7784,7 +8452,7 @@ rec {
"Win32_Web" = [ "Win32" ]; "Win32_Web" = [ "Win32" ];
"Win32_Web_InternetExplorer" = [ "Win32_Web" ]; "Win32_Web_InternetExplorer" = [ "Win32_Web" ];
}; };
resolvedDefaultFeatures = [ "Wdk" "Wdk_Foundation" "Wdk_Storage" "Wdk_Storage_FileSystem" "Wdk_System" "Wdk_System_IO" "Win32" "Win32_Foundation" "Win32_Networking" "Win32_Networking_WinSock" "Win32_Security" "Win32_Storage" "Win32_Storage_FileSystem" "Win32_System" "Win32_System_Com" "Win32_System_IO" "Win32_System_Pipes" "Win32_System_SystemServices" "Win32_System_Threading" "Win32_System_WindowsProgramming" "Win32_UI" "Win32_UI_Shell" "default" ]; resolvedDefaultFeatures = [ "Wdk" "Wdk_Foundation" "Wdk_Storage" "Wdk_Storage_FileSystem" "Wdk_System" "Wdk_System_IO" "Win32" "Win32_Foundation" "Win32_Networking" "Win32_Networking_WinSock" "Win32_Security" "Win32_Storage" "Win32_Storage_FileSystem" "Win32_System" "Win32_System_Com" "Win32_System_Console" "Win32_System_IO" "Win32_System_Pipes" "Win32_System_SystemServices" "Win32_System_Threading" "Win32_System_WindowsProgramming" "Win32_UI" "Win32_UI_Input" "Win32_UI_Input_KeyboardAndMouse" "Win32_UI_Shell" "default" ];
}; };
"windows-targets" = rec { "windows-targets" = rec {
crateName = "windows-targets"; crateName = "windows-targets";

View file

@ -14,6 +14,9 @@ nix-compat = { version = "0.1.0", path = "../../../tvix/nix-compat" }
safer_owning_ref = "0.5.0" safer_owning_ref = "0.5.0"
rayon = "1.8.1" rayon = "1.8.1"
rustc-hash = "2.0.0" rustc-hash = "2.0.0"
tvix-tracing = { version = "0.1.0", path = "../../../tvix/tracing" }
tracing = "0.1.40"
tracing-indicatif = "0.3.6"
[dependencies.polars] [dependencies.polars]
version = "0.36.2" version = "0.36.2"

View file

@ -36,17 +36,27 @@ use polars::{
lazy::dsl::{col, SpecialEq}, lazy::dsl::{col, SpecialEq},
prelude::*, prelude::*,
}; };
use tracing::info_span;
use tracing_indicatif::span_ext::IndicatifSpanExt as _;
use weave::{as_fixed_binary, hash64, leak, load_ph_array, DONE, INDEX_NULL}; use weave::{as_fixed_binary, hash64, leak, load_ph_array, INDEX_NULL};
#[tracing::instrument]
fn main() -> Result<()> { fn main() -> Result<()> {
let _tracing = tvix_tracing::TracingBuilder::default()
.enable_progressbar()
.build()?;
let ph_array: &'static [[u8; 20]] = leak(load_ph_array()?); let ph_array: &'static [[u8; 20]] = leak(load_ph_array()?);
// TODO(edef): re-parallelise this // TODO(edef): re-parallelise this
// We originally parallelised on chunks, but ph_array is only a single chunk, due to how Parquet loading works. // We originally parallelised on chunks, but ph_array is only a single chunk, due to how Parquet loading works.
// TODO(edef): outline the 64-bit hash prefix? it's an indirection, but it saves ~2G of memory // TODO(edef): outline the 64-bit hash prefix? it's an indirection, but it saves ~2G of memory
eprint!("… build index\r");
let ph_map: &'static HashTable<(u64, u32)> = { let ph_map: &'static HashTable<(u64, u32)> = {
let span = info_span!("ph_map", indicatif.pb_show = tracing::field::Empty).entered();
span.pb_set_message("build index");
span.pb_start();
let mut ph_map = HashTable::with_capacity(ph_array.len()); let mut ph_map = HashTable::with_capacity(ph_array.len());
for (offset, item) in ph_array.iter().enumerate() { for (offset, item) in ph_array.iter().enumerate() {
@ -57,7 +67,6 @@ fn main() -> Result<()> {
&*Box::leak(Box::new(ph_map)) &*Box::leak(Box::new(ph_map))
}; };
eprintln!("{DONE}");
let ph_to_idx = |key: &[u8; 20]| -> u32 { let ph_to_idx = |key: &[u8; 20]| -> u32 {
let hash = hash64(key); let hash = hash64(key);
@ -69,35 +78,41 @@ fn main() -> Result<()> {
.unwrap_or(INDEX_NULL) .unwrap_or(INDEX_NULL)
}; };
eprint!("… swizzle references\r"); {
LazyFrame::scan_parquet("narinfo.parquet", ScanArgsParquet::default())? let span = info_span!("swizzle_refs", indicatif.pb_show = tracing::field::Empty).entered();
.with_column( span.pb_set_message("swizzle references");
col("references") span.pb_start();
.map(
move |series: Series| -> PolarsResult<Option<Series>> { LazyFrame::scan_parquet("narinfo.parquet", ScanArgsParquet::default())?
Ok(Some( .with_column(
series col("references")
.list()? .map(
.apply_to_inner(&|series: Series| -> PolarsResult<Series> { move |series: Series| -> PolarsResult<Option<Series>> {
let series = series.binary()?; Ok(Some(
let mut out: Vec<u32> = Vec::with_capacity(series.len()); series
out.extend(as_fixed_binary(series).flatten().map(ph_to_idx)); .list()?
Ok(Series::from_vec("reference_idxs", out)) .apply_to_inner(&|series: Series| -> PolarsResult<Series> {
})? let series = series.binary()?;
.into_series(), let mut out: Vec<u32> = Vec::with_capacity(series.len());
)) out.extend(
}, as_fixed_binary(series).flatten().map(ph_to_idx),
SpecialEq::from_type(DataType::List(DataType::UInt32.into())), );
) Ok(Series::from_vec("reference_idxs", out))
.alias("reference_idxs"), })?
) .into_series(),
.select([col("reference_idxs")]) ))
.with_streaming(true) },
.sink_parquet( SpecialEq::from_type(DataType::List(DataType::UInt32.into())),
"narinfo-references.parquet".into(), )
ParquetWriteOptions::default(), .alias("reference_idxs"),
)?; )
eprintln!("{DONE}"); .select([col("reference_idxs")])
.with_streaming(true)
.sink_parquet(
"narinfo-references.parquet".into(),
ParquetWriteOptions::default(),
)?;
};
Ok(()) Ok(())
} }

View file

@ -8,6 +8,7 @@ use std::{
slice, slice,
sync::Arc, sync::Arc,
}; };
use tracing_indicatif::span_ext::IndicatifSpanExt as _;
use polars::{ use polars::{
datatypes::BinaryChunked, datatypes::BinaryChunked,
@ -20,7 +21,6 @@ pub type FixedBytes<const N: usize> =
ArcRef<'static, polars::export::arrow::buffer::Bytes<u8>, [[u8; N]]>; ArcRef<'static, polars::export::arrow::buffer::Bytes<u8>, [[u8; N]]>;
pub const INDEX_NULL: u32 = !0; pub const INDEX_NULL: u32 = !0;
pub const DONE: &str = "\u{2714}";
/// A terrific hash function, turning 20 bytes of cryptographic hash /// A terrific hash function, turning 20 bytes of cryptographic hash
/// into 8 bytes of cryptographic hash. /// into 8 bytes of cryptographic hash.
@ -42,8 +42,13 @@ pub fn leak<O, T: ?Sized>(r: OwningRef<Arc<O>, T>) -> &T {
/// Read a dense `store_path_hash` array from `narinfo.parquet`, /// Read a dense `store_path_hash` array from `narinfo.parquet`,
/// returning it as an owned [FixedBytes]. /// returning it as an owned [FixedBytes].
#[tracing::instrument(fields(indicatif.pb_show = tracing::field::Empty))]
pub fn load_ph_array() -> Result<FixedBytes<20>> { pub fn load_ph_array() -> Result<FixedBytes<20>> {
eprint!("… load store_path_hash\r"); let span = tracing::Span::current();
span.pb_set_message("load store_path_hash");
span.pb_start();
// TODO(edef): this could use a further pushdown, since polars is more hindrance than help here // TODO(edef): this could use a further pushdown, since polars is more hindrance than help here
// We know this has to fit in memory (we can't mmap it without further encoding constraints), // We know this has to fit in memory (we can't mmap it without further encoding constraints),
// and we want a single `Vec<[u8; 20]>` of the data. // and we want a single `Vec<[u8; 20]>` of the data.
@ -57,7 +62,6 @@ pub fn load_ph_array() -> Result<FixedBytes<20>> {
); );
u32::try_from(ph_array.len()).expect("dataset exceeds 2^32"); u32::try_from(ph_array.len()).expect("dataset exceeds 2^32");
eprintln!("{DONE}");
Ok(ph_array) Ok(ph_array)
} }

View file

@ -15,6 +15,8 @@ use std::{
ops::Index, ops::Index,
sync::atomic::{AtomicU32, Ordering}, sync::atomic::{AtomicU32, Ordering},
}; };
use tracing::{info_span, warn};
use tracing_indicatif::span_ext::IndicatifSpanExt;
use polars::{ use polars::{
datatypes::StaticArray, datatypes::StaticArray,
@ -23,36 +25,48 @@ use polars::{
prelude::*, prelude::*,
}; };
use weave::{as_fixed_binary, hash64, DONE, INDEX_NULL}; use weave::{as_fixed_binary, hash64, INDEX_NULL};
#[tracing::instrument]
fn main() -> Result<()> { fn main() -> Result<()> {
eprint!("… parse roots\r"); let _tracing = tvix_tracing::TracingBuilder::default()
let roots: PathSet32 = as_fixed_binary::<20>( .enable_progressbar()
LazyFrame::scan_parquet("releases.parquet", ScanArgsParquet::default())? .build()?;
.explode([col("store_path_hash")])
.select([col("store_path_hash")]) let roots: PathSet32 = {
.collect()? let span = info_span!("parse_roots", indicatif.pb_show = tracing::field::Empty).entered();
.column("store_path_hash")? span.pb_set_message("parse roots");
.binary()?, span.pb_start();
)
.flatten() as_fixed_binary::<20>(
.collect(); LazyFrame::scan_parquet("releases.parquet", ScanArgsParquet::default())?
eprintln!("{DONE}"); .explode([col("store_path_hash")])
.select([col("store_path_hash")])
.collect()?
.column("store_path_hash")?
.binary()?,
)
.flatten()
.collect()
};
{ {
let ph_array = weave::load_ph_array()?; let span = info_span!("resolve_roots", indicatif.pb_show = tracing::field::Empty).entered();
span.pb_set_message("resolve roots");
span.pb_start();
eprint!("… resolve roots\r"); weave::load_ph_array()?
ph_array.par_iter().enumerate().for_each(|(idx, h)| { .into_par_iter()
if let Some(idx_slot) = roots.find(h) { .enumerate()
assert_eq!( .for_each(|(idx, h)| {
idx_slot.swap(idx as u32, Ordering::Relaxed), if let Some(idx_slot) = roots.find(h) {
INDEX_NULL, assert_eq!(
"duplicate entry" idx_slot.swap(idx as u32, Ordering::Relaxed),
); INDEX_NULL,
} "duplicate entry"
}); );
eprintln!("{DONE}"); }
});
} }
let mut todo = FxHashSet::default(); let mut todo = FxHashSet::default();
@ -67,17 +81,28 @@ fn main() -> Result<()> {
} }
todo.insert(idx); todo.insert(idx);
} }
println!("skipping {unknown_roots} unknown roots");
if unknown_roots != 0 {
warn!("skipping {unknown_roots} unknown roots");
}
} }
eprint!("… load reference_idxs\r"); let ri_array;
let ri_array = ParquetReader::new(File::open("narinfo-references.parquet")?)
.finish()?
.column("reference_idxs")?
.list()?
.clone();
let ri_array = { let ri_array = {
let span = info_span!(
"load_reference_idxs",
indicatif.pb_show = tracing::field::Empty
)
.entered();
span.pb_set_message("load reference_idxs");
span.pb_start();
ri_array = ParquetReader::new(File::open("narinfo-references.parquet")?)
.finish()?
.column("reference_idxs")?
.list()?
.clone();
ChunkedList::new(ri_array.downcast_iter().map(|chunk| { ChunkedList::new(ri_array.downcast_iter().map(|chunk| {
( (
chunk.offsets(), chunk.offsets(),
@ -91,49 +116,64 @@ fn main() -> Result<()> {
) )
})) }))
}; };
eprintln!("{DONE}");
let mut seen = todo.clone(); let mut seen = todo.clone();
while !todo.is_empty() { {
println!("todo: {} seen: {}", todo.len(), seen.len()); let span = info_span!("mark", indicatif.pb_show = tracing::field::Empty).entered();
span.pb_set_message("marking");
span.pb_set_style(&tvix_tracing::PB_PROGRESS_STYLE);
todo = todo while !todo.is_empty() {
.par_iter() span.pb_set_length(seen.len() as u64);
.flat_map(|&parent| { span.pb_set_position(seen.len().saturating_sub(todo.len()) as u64);
if parent == INDEX_NULL {
return FxHashSet::default();
}
ri_array[parent as usize] todo = todo
.iter() .par_iter()
.cloned() .flat_map(|&parent| {
.filter(|child| !seen.contains(child)) if parent == INDEX_NULL {
.collect::<FxHashSet<u32>>() return FxHashSet::default();
}) }
.collect();
for &index in &todo { ri_array[parent as usize]
seen.insert(index); .iter()
.cloned()
.filter(|child| !seen.contains(child))
.collect::<FxHashSet<u32>>()
})
.collect();
for &index in &todo {
seen.insert(index);
}
}
span.pb_set_length(seen.len() as u64);
span.pb_set_position(seen.len() as u64);
if seen.remove(&INDEX_NULL) {
warn!("WARNING: missing edges");
} }
} }
println!("done: {} paths", seen.len()); let seen = {
let span = info_span!("gather_live", indicatif.pb_show = tracing::field::Empty).entered();
span.pb_set_message("gathering live set");
if seen.remove(&INDEX_NULL) { let mut seen: Vec<u32> = seen.into_iter().collect();
println!("WARNING: missing edges"); seen.par_sort();
seen
};
{
let span = info_span!("write_output", indicatif.pb_show = tracing::field::Empty).entered();
span.pb_set_message("writing output");
span.pb_start();
ParquetWriter::new(File::create("live_idxs.parquet")?).finish(&mut df! {
"live_idx" => seen,
}?)?;
} }
eprint!("… gathering live set\r");
let mut seen: Vec<u32> = seen.into_iter().collect();
seen.par_sort();
eprintln!("{DONE}");
eprint!("… writing output\r");
ParquetWriter::new(File::create("live_idxs.parquet")?).finish(&mut df! {
"live_idx" => seen,
}?)?;
eprintln!("{DONE}");
Ok(()) Ok(())
} }