2023-01-18 16:35:18 +01:00
|
|
|
{ depot, lib, pkgs, ... }:
|
2023-01-17 22:36:41 +01:00
|
|
|
|
|
|
|
let
|
|
|
|
buildInputs = with pkgs; [
|
|
|
|
sqlite
|
|
|
|
pkg-config
|
|
|
|
];
|
|
|
|
|
|
|
|
# mirrored input data from OpenCorpora, as of 2023-01-17.
|
|
|
|
#
|
|
|
|
# This data is licensed under CC-BY-SA.
|
2023-01-20 09:54:38 +01:00
|
|
|
openCorporaArchive = pkgs.fetchurl {
|
2023-01-17 22:36:41 +01:00
|
|
|
name = "dict.opcorpora.xml.bz";
|
2023-01-20 09:54:38 +01:00
|
|
|
url = "https://tazj.in/blobs/opencorpora-20230117.xml.bz2";
|
2023-01-17 22:36:41 +01:00
|
|
|
sha256 = "04n5g43fkfc93z6xlwf2qfdrfdfl562pc2ivdb3cbbbsy56gkqg6";
|
|
|
|
};
|
|
|
|
|
2023-01-20 09:54:38 +01:00
|
|
|
openCorpora = pkgs.runCommand "dict.opcorpora.xml" { } ''
|
|
|
|
${pkgs.bzip2}/bin/bunzip2 -k -c ${openCorporaArchive} > $out
|
2023-01-17 22:36:41 +01:00
|
|
|
'';
|
|
|
|
|
2023-01-20 11:31:12 +01:00
|
|
|
# mirrored input data from OpenRussian, as of 2023-01-17.
|
|
|
|
#
|
|
|
|
# This data is licensed under CC-BY-SA.
|
2023-01-20 09:54:38 +01:00
|
|
|
openRussianArchive = pkgs.fetchzip {
|
|
|
|
name = "openrussian-20230117";
|
|
|
|
url = "https://tazj.in/blobs/openrussian-20230117.tar.xz";
|
|
|
|
sha256 = "06jl7i23cx58a0n2626hb82xlzimixvnxp7lxdw0g664kv9bmw25";
|
|
|
|
};
|
|
|
|
|
2023-01-17 22:36:41 +01:00
|
|
|
# development shell with native deps
|
|
|
|
shell = pkgs.mkShell {
|
|
|
|
inherit buildInputs;
|
|
|
|
|
2023-01-20 09:54:38 +01:00
|
|
|
# make datasets available in the environment
|
|
|
|
OPENCORPORA_DATA = openCorpora;
|
|
|
|
OPENRUSSIAN_DATA = openRussianArchive;
|
2023-01-17 22:36:41 +01:00
|
|
|
};
|
2023-01-18 16:35:18 +01:00
|
|
|
|
2023-01-17 22:36:41 +01:00
|
|
|
in
|
2023-01-18 16:35:18 +01:00
|
|
|
lib.fix (self: depot.third_party.naersk.buildPackage {
|
2023-01-17 22:36:41 +01:00
|
|
|
src = depot.third_party.gitignoreSource ./.;
|
|
|
|
inherit buildInputs;
|
|
|
|
|
2023-01-18 16:35:18 +01:00
|
|
|
passthru = depot.nix.readTree.drvTargets {
|
2023-01-20 09:54:38 +01:00
|
|
|
inherit shell openCorpora;
|
2023-01-17 22:36:41 +01:00
|
|
|
|
2023-01-18 16:35:18 +01:00
|
|
|
# target that actually builds an entire database
|
2023-01-20 11:31:12 +01:00
|
|
|
database = pkgs.runCommand "tvl-russian-db.sqlite"
|
|
|
|
{
|
|
|
|
OPENCORPORA_DATA = openCorpora;
|
|
|
|
OPENRUSSIAN_DATA = openRussianArchive;
|
|
|
|
} "${self}/bin/data-import --output $out";
|
2023-01-17 22:36:41 +01:00
|
|
|
};
|
2023-01-18 16:35:18 +01:00
|
|
|
})
|