feat(corp/data-import): build morphology database in derivation
This makes the actual imported database of the ~whole Russian language (all lemmas, grammemes, forms etc.) a Nix build target which is built in CI. This still needs schema normalisation (it's fairly directly mapped to the raw data), but it's already starting to be a useful data set. This also happens to be a pretty cool demonstration of the power of Nix. You can do `nix-build -A corp.russian.data-import.database` and out comes a perfectly valid SQLite database with a valid external data import! Change-Id: I5d6d15e67d0e4a7ff590fad06252be34f5d561fd Reviewed-on: https://cl.tvl.fyi/c/depot/+/7866 Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
This commit is contained in:
parent
0ed6583edc
commit
c891833414
1 changed files with 10 additions and 6 deletions
|
@ -1,4 +1,4 @@
|
|||
{ depot, pkgs, ... }:
|
||||
{ depot, lib, pkgs, ... }:
|
||||
|
||||
let
|
||||
buildInputs = with pkgs; [
|
||||
|
@ -26,14 +26,18 @@ let
|
|||
# make OPENCORPORA_DATA available in the environment
|
||||
OPENCORPORA_DATA = inputData;
|
||||
};
|
||||
|
||||
in
|
||||
depot.third_party.naersk.buildPackage {
|
||||
lib.fix (self: depot.third_party.naersk.buildPackage {
|
||||
src = depot.third_party.gitignoreSource ./.;
|
||||
inherit buildInputs;
|
||||
|
||||
passthru = {
|
||||
inherit shell;
|
||||
|
||||
passthru = depot.nix.readTree.drvTargets {
|
||||
inherit shell inputData;
|
||||
|
||||
# target that actually builds an entire database
|
||||
database = pkgs.runCommand "tvl-russian-db.sqlite" { } ''
|
||||
${self}/bin/data-import ${inputData} $out
|
||||
'';
|
||||
};
|
||||
}
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue