Patch-Source: https://github.com/meilisearch/meilisearch/pull/3687 (rebased) -- From 2fd515a34c8a9648f32320ececa09d68cc0b7719 Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Mon, 24 Apr 2023 00:26:08 +0200 Subject: [PATCH] Allow to disable specialized tokenizations (again) In PR #2773, I added the `chinese`, `hebrew`, `japanese` and `thai` feature flags to allow melisearch to be built without huge specialed tokenizations that took up 90% of the melisearch binary size. Unfortunately, due to some recent changes, this doesn't work anymore. The problem lies in excessive use of the `default` feature flag, which infects the dependency graph. Instead of adding `default-features = false` here and there, it's easier and more future-proof to not declare `default` in `milli` and `meilisearch-types`. I've renamed it to `all-tokenizers`, which also makes it a bit clearer what it's about. diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 5203a76014..0de81413b5 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -13,7 +13,7 @@ license.workspace = true [dependencies] anyhow = "1.0.65" csv = "1.1.6" -milli = { path = "../milli", default-features = false } +milli = { path = "../milli" } mimalloc = { version = "0.1.29", default-features = false } serde_json = { version = "1.0.85", features = ["preserve_order"] } @@ -31,7 +31,7 @@ flate2 = "1.0.24" reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false } [features] -default = ["milli/default"] +default = ["milli/all-tokenizations"] [[bench]] name = "search_songs" diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 9828c5f493..b8a3f3bb44 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -22,7 +22,7 @@ file-store = { path = "../file-store" } flate2 = "1.0.24" fst = "0.4.7" memmap2 = "0.5.7" -milli = { path = "../milli", default-features = false } +milli = { path = "../milli" } roaring = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.145", features = ["derive"] } serde-cs = "0.2.4" @@ -40,7 +40,7 @@ meili-snap = { path = "../meili-snap" } [features] # all specialized tokenizations -default = ["milli/default"] +all-tokenizations = ["milli/all-tokenizations"] # chinese specialized tokenization chinese = ["milli/chinese"] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index d4b7723224..c9aa7850ff 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -106,7 +106,7 @@ vergen = { version = "7.4.2", default-features = false, features = ["git"] } zip = { version = "0.6.2", optional = true } [features] -default = ["analytics", "meilisearch-types/default", "mini-dashboard"] +default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] analytics = ["segment"] mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"] chinese = ["meilisearch-types/chinese"] --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -19 +19 @@ -charabia = { version = "0.7.1", default-features = false } +charabia = { version = "0.7.2", default-features = false } @@ -68,7 +68,7 @@ rand = {version = "0.8.5", features = ["small_rng"] } fuzzcheck = "0.12.1" [features] -default = [ "charabia/default" ] +all-tokenizations = [ "charabia/default" ] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1558,7 +1558,7 @@ use super::*; use crate::index::tests::TempIndex; - #[cfg(feature = "default")] + #[cfg(feature = "japanese")] #[test] fn test_kanji_language_detection() { let index = TempIndex::new(); --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -451,7 +451,7 @@ use super::*; use crate::index::tests::TempIndex; - #[cfg(feature = "default")] + #[cfg(feature = "japanese")] #[test] fn test_kanji_language_detection() { let index = TempIndex::new();