chore(3p/nixpkgs/clickhouse): 23.3.13.6 -> 23.10.3.5
Change-Id: I3e4c43690fcaf50965152bf40e1ca2b027010fcf Reviewed-on: https://cl.tvl.fyi/c/depot/+/9997 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
46964f6d8f
commit
3fe455cd4a
4 changed files with 234 additions and 26 deletions
1
third_party/overlays/clickhouse/.skip-tree
vendored
Normal file
1
third_party/overlays/clickhouse/.skip-tree
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
this needs to be callPackage'd
|
203
third_party/overlays/clickhouse/default.nix
vendored
Normal file
203
third_party/overlays/clickhouse/default.nix
vendored
Normal file
|
@ -0,0 +1,203 @@
|
||||||
|
{ lib
|
||||||
|
, llvmPackages
|
||||||
|
, fetchFromGitHub
|
||||||
|
, fetchpatch
|
||||||
|
, cmake
|
||||||
|
, ninja
|
||||||
|
, python3
|
||||||
|
, perl
|
||||||
|
, nasm
|
||||||
|
, yasm
|
||||||
|
, nixosTests
|
||||||
|
, darwin
|
||||||
|
, findutils
|
||||||
|
|
||||||
|
# currently for BLAKE3 hash function
|
||||||
|
, rustSupport ? true
|
||||||
|
|
||||||
|
, corrosion
|
||||||
|
, rustc
|
||||||
|
, cargo
|
||||||
|
, rustPlatform
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
inherit (llvmPackages) stdenv;
|
||||||
|
mkDerivation = (
|
||||||
|
if stdenv.isDarwin
|
||||||
|
then darwin.apple_sdk_11_0.llvmPackages_16.stdenv
|
||||||
|
else llvmPackages.stdenv
|
||||||
|
).mkDerivation;
|
||||||
|
in
|
||||||
|
mkDerivation rec {
|
||||||
|
pname = "clickhouse";
|
||||||
|
version = "23.10.3.5";
|
||||||
|
|
||||||
|
src = fetchFromGitHub rec {
|
||||||
|
owner = "ClickHouse";
|
||||||
|
repo = "ClickHouse";
|
||||||
|
rev = "v${version}-stable";
|
||||||
|
fetchSubmodules = true;
|
||||||
|
name = "clickhouse-${rev}.tar.gz";
|
||||||
|
hash = "sha256-H3nIhBydLBxSesGrvqmwHmBoQGCGQlWgVVUudKLLkIY=";
|
||||||
|
postFetch = ''
|
||||||
|
# delete files that make the source too big
|
||||||
|
rm -rf $out/contrib/llvm-project/llvm/test
|
||||||
|
rm -rf $out/contrib/llvm-project/clang/test
|
||||||
|
rm -rf $out/contrib/croaring/benchmarks
|
||||||
|
|
||||||
|
# fix case insensitivity on macos https://github.com/NixOS/nixpkgs/issues/39308
|
||||||
|
rm -rf $out/contrib/sysroot/linux-*
|
||||||
|
rm -rf $out/contrib/liburing/man
|
||||||
|
|
||||||
|
# compress to not exceed the 2GB output limit
|
||||||
|
# try to make a deterministic tarball
|
||||||
|
tar -I 'gzip -n' \
|
||||||
|
--sort=name \
|
||||||
|
--mtime=1970-01-01 \
|
||||||
|
--owner=0 --group=0 \
|
||||||
|
--numeric-owner --mode=go=rX,u+rw,a-s \
|
||||||
|
--transform='s@^@source/@S' \
|
||||||
|
-cf temp -C "$out" .
|
||||||
|
rm -r "$out"
|
||||||
|
mv temp "$out"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
strictDeps = true;
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
ninja
|
||||||
|
python3
|
||||||
|
perl
|
||||||
|
llvmPackages.lld
|
||||||
|
] ++ lib.optionals stdenv.isx86_64 [
|
||||||
|
nasm
|
||||||
|
yasm
|
||||||
|
] ++ lib.optionals stdenv.isDarwin [
|
||||||
|
llvmPackages.bintools
|
||||||
|
findutils
|
||||||
|
darwin.bootstrap_cmds
|
||||||
|
] ++ lib.optionals rustSupport [
|
||||||
|
rustc
|
||||||
|
cargo
|
||||||
|
rustPlatform.cargoSetupHook
|
||||||
|
];
|
||||||
|
|
||||||
|
# their vendored version is too old and missing this patch: https://github.com/corrosion-rs/corrosion/pull/205
|
||||||
|
corrosionSrc =
|
||||||
|
if rustSupport then
|
||||||
|
fetchFromGitHub
|
||||||
|
{
|
||||||
|
owner = "corrosion-rs";
|
||||||
|
repo = "corrosion";
|
||||||
|
rev = "v0.3.5";
|
||||||
|
hash = "sha256-r/jrck4RiQynH1+Hx4GyIHpw/Kkr8dHe1+vTHg+fdRs=";
|
||||||
|
} else null;
|
||||||
|
corrosionDeps =
|
||||||
|
if rustSupport then
|
||||||
|
rustPlatform.fetchCargoTarball
|
||||||
|
{
|
||||||
|
src = corrosionSrc;
|
||||||
|
name = "corrosion-deps";
|
||||||
|
preBuild = "cd generator";
|
||||||
|
hash = "sha256-dhUgpwSjE9NZ2mCkhGiydI51LIOClA5wwk1O3mnnbM8=";
|
||||||
|
} else null;
|
||||||
|
rustDeps =
|
||||||
|
if rustSupport then
|
||||||
|
rustPlatform.fetchCargoTarball
|
||||||
|
{
|
||||||
|
inherit src;
|
||||||
|
name = "rust-deps";
|
||||||
|
preBuild = "cd rust";
|
||||||
|
hash = "sha256-fWDAGm19b7uZv8aBdBoieY5c6POd8IxFXbGdtONpZbw=";
|
||||||
|
} else null;
|
||||||
|
|
||||||
|
dontCargoSetupPostUnpack = true;
|
||||||
|
postUnpack = lib.optionalString rustSupport ''
|
||||||
|
pushd source
|
||||||
|
|
||||||
|
rm -rf contrib/corrosion
|
||||||
|
cp -r --no-preserve=mode $corrosionSrc contrib/corrosion
|
||||||
|
|
||||||
|
pushd contrib/corrosion/generator
|
||||||
|
cargoDeps="$corrosionDeps" cargoSetupPostUnpackHook
|
||||||
|
corrosionDepsCopy="$cargoDepsCopy"
|
||||||
|
popd
|
||||||
|
|
||||||
|
pushd rust
|
||||||
|
cargoDeps="$rustDeps" cargoSetupPostUnpackHook
|
||||||
|
rustDepsCopy="$cargoDepsCopy"
|
||||||
|
cat .cargo/config >> .cargo/config.toml.in
|
||||||
|
cat .cargo/config >> skim/.cargo/config.toml.in
|
||||||
|
rm .cargo/config
|
||||||
|
popd
|
||||||
|
|
||||||
|
popd
|
||||||
|
'';
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs src/
|
||||||
|
|
||||||
|
substituteInPlace src/Storages/System/StorageSystemLicenses.sh \
|
||||||
|
--replace 'git rev-parse --show-toplevel' '$src'
|
||||||
|
substituteInPlace utils/check-style/check-duplicate-includes.sh \
|
||||||
|
--replace 'git rev-parse --show-toplevel' '$src'
|
||||||
|
substituteInPlace utils/check-style/check-ungrouped-includes.sh \
|
||||||
|
--replace 'git rev-parse --show-toplevel' '$src'
|
||||||
|
substituteInPlace utils/list-licenses/list-licenses.sh \
|
||||||
|
--replace 'git rev-parse --show-toplevel' '$src'
|
||||||
|
substituteInPlace utils/check-style/check-style \
|
||||||
|
--replace 'git rev-parse --show-toplevel' '$src'
|
||||||
|
'' + lib.optionalString stdenv.isDarwin ''
|
||||||
|
sed -i 's|gfind|find|' cmake/tools.cmake
|
||||||
|
sed -i 's|ggrep|grep|' cmake/tools.cmake
|
||||||
|
'' + lib.optionalString rustSupport ''
|
||||||
|
|
||||||
|
pushd contrib/corrosion/generator
|
||||||
|
cargoDepsCopy="$corrosionDepsCopy" cargoSetupPostPatchHook
|
||||||
|
popd
|
||||||
|
|
||||||
|
pushd rust
|
||||||
|
cargoDepsCopy="$rustDepsCopy" cargoSetupPostPatchHook
|
||||||
|
popd
|
||||||
|
|
||||||
|
cargoSetupPostPatchHook() { true; }
|
||||||
|
'';
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DENABLE_TESTS=OFF"
|
||||||
|
"-DCOMPILER_CACHE=disabled"
|
||||||
|
"-DENABLE_EMBEDDED_COMPILER=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
# https://github.com/ClickHouse/ClickHouse/issues/49988
|
||||||
|
hardeningDisable = [ "fortify" ];
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
rm -rf $out/share/clickhouse-test
|
||||||
|
|
||||||
|
sed -i -e '\!<log>/var/log/clickhouse-server/clickhouse-server\.log</log>!d' \
|
||||||
|
$out/etc/clickhouse-server/config.xml
|
||||||
|
substituteInPlace $out/etc/clickhouse-server/config.xml \
|
||||||
|
--replace "<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>" "<console>1</console>"
|
||||||
|
substituteInPlace $out/etc/clickhouse-server/config.xml \
|
||||||
|
--replace "<level>trace</level>" "<level>warning</level>"
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Builds in 7+h with 2 cores, and ~20m with a big-parallel builder.
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
passthru.tests.clickhouse = nixosTests.clickhouse;
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
homepage = "https://clickhouse.com";
|
||||||
|
description = "Column-oriented database management system";
|
||||||
|
license = licenses.asl20;
|
||||||
|
maintainers = with maintainers; [ orivej ];
|
||||||
|
|
||||||
|
# not supposed to work on 32-bit https://github.com/ClickHouse/ClickHouse/pull/23959#issuecomment-835343685
|
||||||
|
platforms = lib.filter (x: (lib.systems.elaborate x).is64bit) (platforms.linux ++ platforms.darwin);
|
||||||
|
broken = stdenv.buildPlatform != stdenv.hostPlatform;
|
||||||
|
};
|
||||||
|
}
|
|
@ -1,41 +1,40 @@
|
||||||
From 26e65e4addc990cc09b59b587792ac4a454e5cdd Mon Sep 17 00:00:00 2001
|
From cdea2e8ad98995202ce81c9c030f2ae64d73b05a Mon Sep 17 00:00:00 2001
|
||||||
From: edef <edef@edef.eu>
|
From: edef <edef@edef.eu>
|
||||||
Date: Mon, 30 Oct 2023 08:08:10 +0000
|
Date: Mon, 30 Oct 2023 08:08:10 +0000
|
||||||
Subject: [PATCH] [backport] Support reading arrow::LargeListArray
|
Subject: [PATCH] Support reading arrow::LargeListArray
|
||||||
|
|
||||||
---
|
---
|
||||||
.../Formats/Impl/ArrowColumnToCHColumn.cpp | 35 ++++++++++++++-----
|
.../Formats/Impl/ArrowColumnToCHColumn.cpp | 33 +++++++++++++++----
|
||||||
1 file changed, 26 insertions(+), 9 deletions(-)
|
1 file changed, 26 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
|
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
|
||||||
index 54a6c8493ea..94cf59fd357 100644
|
index 6f9d49498f2..b93846cd4eb 100644
|
||||||
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
|
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
|
||||||
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
|
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
|
||||||
@@ -336,7 +336,22 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr<arrow::ChunkedArray>
|
@@ -436,6 +436,22 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr<arrow::ChunkedArray>
|
||||||
return nullmap_column;
|
return nullmap_column;
|
||||||
}
|
}
|
||||||
|
|
||||||
-static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
|
+template <typename T>
|
||||||
+template<typename T>
|
|
||||||
+struct ArrowOffsetArray;
|
+struct ArrowOffsetArray;
|
||||||
+
|
+
|
||||||
+template<>
|
+template <>
|
||||||
+struct ArrowOffsetArray<arrow::ListArray>
|
+struct ArrowOffsetArray<arrow::ListArray>
|
||||||
+{
|
+{
|
||||||
+ using type = arrow::Int32Array;
|
+ using type = arrow::Int32Array;
|
||||||
+};
|
+};
|
||||||
+
|
+
|
||||||
+template<>
|
+template <>
|
||||||
+struct ArrowOffsetArray<arrow::LargeListArray>
|
+struct ArrowOffsetArray<arrow::LargeListArray>
|
||||||
+{
|
+{
|
||||||
+ using type = arrow::Int64Array;
|
+ using type = arrow::Int64Array;
|
||||||
+};
|
+};
|
||||||
+
|
+
|
||||||
+template<typename ArrowListArray> static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
|
+template <typename ArrowListArray>
|
||||||
|
static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
|
||||||
{
|
{
|
||||||
auto offsets_column = ColumnUInt64::create();
|
auto offsets_column = ColumnUInt64::create();
|
||||||
ColumnArray::Offsets & offsets_data = assert_cast<ColumnVector<UInt64> &>(*offsets_column).getData();
|
@@ -444,9 +460,9 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedAr
|
||||||
@@ -346,9 +361,9 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedAr
|
|
||||||
|
|
||||||
for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
|
for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
|
||||||
{
|
{
|
||||||
|
@ -43,16 +42,16 @@ index 54a6c8493ea..94cf59fd357 100644
|
||||||
+ ArrowListArray & list_chunk = dynamic_cast<ArrowListArray &>(*(arrow_column->chunk(chunk_i)));
|
+ ArrowListArray & list_chunk = dynamic_cast<ArrowListArray &>(*(arrow_column->chunk(chunk_i)));
|
||||||
auto arrow_offsets_array = list_chunk.offsets();
|
auto arrow_offsets_array = list_chunk.offsets();
|
||||||
- auto & arrow_offsets = dynamic_cast<arrow::Int32Array &>(*arrow_offsets_array);
|
- auto & arrow_offsets = dynamic_cast<arrow::Int32Array &>(*arrow_offsets_array);
|
||||||
+ auto & arrow_offsets = dynamic_cast<typename ArrowOffsetArray<ArrowListArray>::type &>(*arrow_offsets_array);
|
+ auto & arrow_offsets = dynamic_cast<ArrowOffsetArray<ArrowListArray>::type &>(*arrow_offsets_array);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
|
* CH uses element size as "offsets", while arrow uses actual offsets as offsets.
|
||||||
@@ -498,13 +513,13 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr<arrow::ChunkedArray>
|
@@ -602,13 +618,14 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr<arrow::ChunkedArray>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
-static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
|
+template <typename ArrowListArray>
|
||||||
+template<typename ArrowListArray> static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
|
static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
|
||||||
{
|
{
|
||||||
arrow::ArrayVector array_vector;
|
arrow::ArrayVector array_vector;
|
||||||
array_vector.reserve(arrow_column->num_chunks());
|
array_vector.reserve(arrow_column->num_chunks());
|
||||||
|
@ -63,13 +62,13 @@ index 54a6c8493ea..94cf59fd357 100644
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
|
* It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
|
||||||
@@ -636,12 +651,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
@@ -819,12 +836,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
||||||
if (map_type_hint)
|
key_type_hint = map_type_hint->getKeyType();
|
||||||
nested_type_hint = assert_cast<const DataTypeArray *>(map_type_hint->getNestedType().get())->getNestedType();
|
}
|
||||||
}
|
}
|
||||||
- auto arrow_nested_column = getNestedArrowColumn(arrow_column);
|
- auto arrow_nested_column = getNestedArrowColumn(arrow_column);
|
||||||
+ auto arrow_nested_column = getNestedArrowColumn<arrow::ListArray>(arrow_column);
|
+ auto arrow_nested_column = getNestedArrowColumn<arrow::ListArray>(arrow_column);
|
||||||
auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
|
auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint, true);
|
||||||
if (skipped)
|
if (skipped)
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
|
@ -78,7 +77,7 @@ index 54a6c8493ea..94cf59fd357 100644
|
||||||
|
|
||||||
const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
|
const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
|
||||||
const auto * tuple_type = assert_cast<const DataTypeTuple *>(nested_column.type.get());
|
const auto * tuple_type = assert_cast<const DataTypeTuple *>(nested_column.type.get());
|
||||||
@@ -650,7 +665,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
@@ -846,7 +863,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
||||||
return {std::move(map_column), std::move(map_type), column_name};
|
return {std::move(map_column), std::move(map_type), column_name};
|
||||||
}
|
}
|
||||||
case arrow::Type::LIST:
|
case arrow::Type::LIST:
|
||||||
|
@ -88,13 +87,13 @@ index 54a6c8493ea..94cf59fd357 100644
|
||||||
DataTypePtr nested_type_hint;
|
DataTypePtr nested_type_hint;
|
||||||
if (type_hint)
|
if (type_hint)
|
||||||
{
|
{
|
||||||
@@ -658,11 +675,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
@@ -854,11 +873,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
||||||
if (array_type_hint)
|
if (array_type_hint)
|
||||||
nested_type_hint = array_type_hint->getNestedType();
|
nested_type_hint = array_type_hint->getNestedType();
|
||||||
}
|
}
|
||||||
- auto arrow_nested_column = getNestedArrowColumn(arrow_column);
|
- auto arrow_nested_column = getNestedArrowColumn(arrow_column);
|
||||||
+ auto arrow_nested_column = is_large ? getNestedArrowColumn<arrow::LargeListArray>(arrow_column) : getNestedArrowColumn<arrow::ListArray>(arrow_column);
|
+ auto arrow_nested_column = is_large ? getNestedArrowColumn<arrow::LargeListArray>(arrow_column) : getNestedArrowColumn<arrow::ListArray>(arrow_column);
|
||||||
auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
|
auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint);
|
||||||
if (skipped)
|
if (skipped)
|
||||||
return {};
|
return {};
|
||||||
- auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
|
- auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
|
||||||
|
|
7
third_party/overlays/tvl.nix
vendored
7
third_party/overlays/tvl.nix
vendored
|
@ -148,7 +148,12 @@ depot.nix.readTree.drvTargets {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
clickhouse = super.clickhouse.overrideAttrs (old: {
|
# we're vendoring this for now, since the version upgrade has a lot of changes
|
||||||
|
# we can't merge it upstream yet because the Darwin build is broken
|
||||||
|
# https://github.com/NixOS/nixpkgs/pull/267033
|
||||||
|
clickhouse = (super.callPackage ./clickhouse {
|
||||||
|
llvmPackages = super.llvmPackages_16;
|
||||||
|
}).overrideAttrs (old: {
|
||||||
patches = old.patches or [ ] ++ [
|
patches = old.patches or [ ] ++ [
|
||||||
# https://github.com/ClickHouse/ClickHouse/pull/56118
|
# https://github.com/ClickHouse/ClickHouse/pull/56118
|
||||||
./patches/clickhouse-support-reading-arrow-LargeListArray.patch
|
./patches/clickhouse-support-reading-arrow-LargeListArray.patch
|
||||||
|
|
Loading…
Reference in a new issue