From 0e94213af06aaf861757e88f796212242afff765 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 29 Sep 2022 18:55:24 +0800 Subject: [PATCH] validate index settings on create --- src/core/index.rs | 40 +++++++++++++++++++++++++++++++++---- src/indexer/index_writer.rs | 29 +++++++++++++++++++++++++++ src/query/set_query.rs | 1 - src/schema/field_type.rs | 21 +++++++++++++++++++ 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/src/core/index.rs b/src/core/index.rs index d0f89e1b04..21f79137d8 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -19,7 +19,7 @@ use crate::error::{DataCorruption, TantivyError}; use crate::indexer::index_writer::{MAX_NUM_THREAD, MEMORY_ARENA_NUM_BYTES_MIN}; use crate::indexer::segment_updater::save_metas; use crate::reader::{IndexReader, IndexReaderBuilder}; -use crate::schema::{Field, FieldType, Schema}; +use crate::schema::{Cardinality, Field, FieldType, Schema}; use crate::tokenizer::{TextAnalyzer, TokenizerManager}; use crate::IndexWriter; @@ -152,9 +152,7 @@ impl IndexBuilder { /// This should only be used for unit tests. pub fn create_in_ram(self) -> Result { let ram_directory = RamDirectory::create(); - Ok(self - .create(ram_directory) - .expect("Creating a RamDirectory should never fail")) + self.create(ram_directory) } /// Creates a new index in a given filepath. @@ -228,10 +226,44 @@ impl IndexBuilder { )) } } + + fn validate(&self) -> crate::Result<()> { + if let Some(schema) = self.schema.as_ref() { + if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref() { + let schema_field = schema.get_field(&sort_by_field.field).ok_or_else(|| { + TantivyError::InvalidArgument(format!( + "Field to sort index {} not found in schema", + sort_by_field.field + )) + })?; + let entry = schema.get_field_entry(schema_field); + if !entry.is_fast() { + return Err(TantivyError::InvalidArgument(format!( + "Field {} is no fast field. Field needs to be a single value fast field \ + to be used to sort an index", + sort_by_field.field + ))); + } + if entry.field_type().fastfield_cardinality() != Some(Cardinality::SingleValue) { + return Err(TantivyError::InvalidArgument(format!( + "Only single value fast field Cardinality supported for sorting index {}", + sort_by_field.field + ))); + } + } + Ok(()) + } else { + Err(TantivyError::InvalidArgument( + "no schema passed".to_string(), + )) + } + } + /// Creates a new index given an implementation of the trait `Directory`. /// /// If a directory previously existed, it will be erased. fn create>>(self, dir: T) -> crate::Result { + self.validate()?; let dir = dir.into(); let directory = ManagedDirectory::wrap(dir)?; save_new_metas( diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index a49a4c0645..3caa0f4aa3 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1395,6 +1395,35 @@ mod tests { assert!(commit_again.is_ok()); } + #[test] + fn test_sort_by_multivalue_field_error() -> crate::Result<()> { + let mut schema_builder = schema::Schema::builder(); + let options = NumericOptions::default().set_fast(Cardinality::MultiValues); + schema_builder.add_u64_field("id", options); + let schema = schema_builder.build(); + + let settings = IndexSettings { + sort_by_field: Some(IndexSortByField { + field: "id".to_string(), + order: Order::Desc, + }), + ..Default::default() + }; + + let err = Index::builder() + .schema(schema) + .settings(settings) + .create_in_ram() + .unwrap_err(); + assert_eq!( + err.to_string(), + "An invalid argument was passed: 'Only single value fast field Cardinality supported \ + for sorting index id'" + ); + + Ok(()) + } + #[test] fn test_delete_with_sort_by_field() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); diff --git a/src/query/set_query.rs b/src/query/set_query.rs index 32074bd1f6..002e6cbd63 100644 --- a/src/query/set_query.rs +++ b/src/query/set_query.rs @@ -43,7 +43,6 @@ impl TermSetQuery { let error_msg = format!("Field {:?} is not indexed.", field_entry.name()); return Err(crate::TantivyError::SchemaError(error_msg)); } - let field_type = field_type.value_type(); // In practice this won't fail because: // - we are writing to memory, so no IoError diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 50ff98afe5..3a631697e8 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; use thiserror::Error; +use super::Cardinality; use crate::schema::bytes_options::BytesOptions; use crate::schema::facet_options::FacetOptions; use crate::schema::{ @@ -214,6 +215,26 @@ impl FieldType { } } + /// returns true if the field is fast. + pub fn fastfield_cardinality(&self) -> Option { + match *self { + FieldType::Bytes(ref bytes_options) if bytes_options.is_fast() => { + Some(Cardinality::SingleValue) + } + FieldType::Str(ref text_options) if text_options.is_fast() => { + Some(Cardinality::MultiValues) + } + FieldType::U64(ref int_options) + | FieldType::I64(ref int_options) + | FieldType::F64(ref int_options) + | FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality(), + FieldType::Date(ref date_options) => date_options.get_fastfield_cardinality(), + FieldType::Facet(_) => Some(Cardinality::MultiValues), + FieldType::JsonObject(_) => None, + _ => None, + } + } + /// returns true if the field is normed (see [fieldnorms](crate::fieldnorm)). pub fn has_fieldnorms(&self) -> bool { match *self {