Skip to content

Commit

Permalink
search-api: Attempt to reduce number of index fields
Browse files Browse the repository at this point in the history
Since we are already over the limit if we don't use dynamic mapping,
this is the first attempt at removing some unused language fields.
  • Loading branch information
jnatten committed Feb 26, 2025
1 parent 69d68d4 commit a4dd138
Show file tree
Hide file tree
Showing 17 changed files with 151 additions and 110 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,18 @@ package no.ndla.searchapi.model.search
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
import no.ndla.search.model.{SearchableLanguageList, SearchableLanguageValues}
import no.ndla.searchapi.model.taxonomy.TaxonomyContext

// NOTE: This will need to match `TaxonomyContextDTO` in `taxonomy-api`
case class SearchableTaxonomyContext(
domainObject: TaxonomyContext,
publicId: String,
contextId: String,
rootId: String,
root: SearchableLanguageValues,
path: String,
breadcrumbs: SearchableLanguageList,
contextType: String,
relevanceId: String,
relevance: SearchableLanguageValues,
resourceTypes: List[SearchableTaxonomyResourceType],
resourceTypeIds: List[String],
parentIds: List[String],
isPrimary: Boolean,
isActive: Boolean,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ object Node {
})
}

// NOTE: This will need to match `TaxonomyContextDTO` in `taxonomy-api`
case class TaxonomyContext(
publicId: String,
rootId: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,19 @@ trait ArticleIndexService {
),
dateField("nextRevision.revisionDate") // This is needed for sorting, even if it is never used for articles
)
val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("metaDescription") ++
generateLanguageSupportedDynamicTemplates("content") ++
generateLanguageSupportedDynamicTemplates("visualElement") ++
generateLanguageSupportedDynamicTemplates("introduction") ++
generateLanguageSupportedDynamicTemplates("metaDescription") ++
generateLanguageSupportedDynamicTemplates("tags") ++
generateLanguageSupportedDynamicTemplates("embedAttributes") ++
generateLanguageSupportedDynamicTemplates("relevance") ++
generateLanguageSupportedDynamicTemplates("breadcrumbs") ++
generateLanguageSupportedDynamicTemplates("name", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("context.root") ++
generateLanguageSupportedDynamicTemplates("context.relevance") ++
generateLanguageSupportedDynamicTemplates("context.resourceTypes.name") ++
generateLanguageSupportedDynamicTemplates("contexts.root") ++
generateLanguageSupportedDynamicTemplates("contexts.relevance") ++
generateLanguageSupportedDynamicTemplates("contexts.resourceTypes.name")
val dynamics =
languageValuesMapping("title", keepRaw = true) ++
languageValuesMapping("metaDescription") ++
languageValuesMapping("content") ++
languageValuesMapping("visualElement") ++
languageValuesMapping("introduction") ++
languageValuesMapping("tags") ++
languageValuesMapping("embedAttributes") ++
languageValuesMapping("relevance") ++
languageValuesMapping("breadcrumbs") ++
languageValuesMapping("name", keepRaw = true)

properties(fields).dynamicTemplates(dynamics)
properties(fields ++ dynamics)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,12 @@ trait DraftConceptIndexService {
ObjectField("domainObject", enabled = Some(false))
)
val dynamics =
generateLanguageSupportedDynamicTemplates("title", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("content", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("tags")
languageValuesMapping("title", keepRaw = true) ++
languageValuesMapping("content", keepRaw = true) ++
languageValuesMapping("tags")

properties(fields ++ dynamics)

properties(fields).dynamicTemplates(dynamics)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,29 +99,22 @@ trait DraftIndexService {
keywordField("defaultRoot"),
keywordField("defaultResourceTypeName")
)
val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("metaDescription") ++
generateLanguageSupportedDynamicTemplates("content") ++
generateLanguageSupportedDynamicTemplates("visualElement") ++
generateLanguageSupportedDynamicTemplates("introduction") ++
generateLanguageSupportedDynamicTemplates("tags") ++
generateLanguageSupportedDynamicTemplates("embedAttributes") ++
generateLanguageSupportedDynamicTemplates("relevance") ++
generateLanguageSupportedDynamicTemplates("breadcrumbs") ++
generateLanguageSupportedDynamicTemplates("name", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("contexts.root", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("parentTopicName", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("resourceTypeName", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("primaryRoot", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("context.root") ++
generateLanguageSupportedDynamicTemplates("context.relevance") ++
generateLanguageSupportedDynamicTemplates("context.resourceTypes.name") ++
generateLanguageSupportedDynamicTemplates("contexts.root") ++
generateLanguageSupportedDynamicTemplates("contexts.relevance") ++
generateLanguageSupportedDynamicTemplates("contexts.resourceTypes.name")

properties(fields).dynamicTemplates(dynamics)
val dynamics =
languageValuesMapping("title", keepRaw = true) ++
languageValuesMapping("metaDescription") ++
languageValuesMapping("content") ++
languageValuesMapping("visualElement") ++
languageValuesMapping("introduction") ++
languageValuesMapping("tags") ++
languageValuesMapping("embedAttributes") ++
languageValuesMapping("relevance") ++
languageValuesMapping("breadcrumbs") ++
languageValuesMapping("name", keepRaw = true) ++
languageValuesMapping("parentTopicName", keepRaw = true) ++
languageValuesMapping("resourceTypeName", keepRaw = true) ++
languageValuesMapping("primaryRoot", keepRaw = true)

properties(fields ++ dynamics)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ trait GrepIndexService {
ObjectField("domainObject", enabled = Some(false))
)

val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true)
properties(fields).dynamicTemplates(dynamics)
val dynamics = languageValuesMapping("title", keepRaw = true)
properties(fields ++ dynamics)
}

def indexDocuments(numShards: Option[Int], grepBundle: Option[GrepBundle]): Try[ReindexResult] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ package no.ndla.searchapi.service.search

import com.sksamuel.elastic4s.ElasticDsl.*
import com.sksamuel.elastic4s.analysis.*
import com.sksamuel.elastic4s.fields.{ElasticField, NestedField}
import com.sksamuel.elastic4s.fields.{ElasticField, NestedField, ObjectField}
import com.sksamuel.elastic4s.requests.indexes.IndexRequest
import com.sksamuel.elastic4s.requests.mappings.dynamictemplate.DynamicTemplateRequest
import com.typesafe.scalalogging.StrictLogging
Expand All @@ -32,6 +32,32 @@ trait IndexService {

trait BulkIndexingService extends BaseIndexService {

protected def languageValuesMapping(name: String, keepRaw: Boolean = false): Seq[ElasticField] = {
val subfields = List(
textField("trigram").analyzer("trigram"),
textField("decompounded").searchAnalyzer("standard").analyzer("compound_analyzer"),
textField("exact").analyzer("exact")
)
val subfieldsWithRaw = if (keepRaw) subfields :+ keywordField("raw") else subfields

val analyzedFields = SearchLanguage.languageAnalyzers.map(langAnalyzer => {
textField(s"$name.${langAnalyzer.languageTag.toString}")
.analyzer(langAnalyzer.analyzer)
.fields(subfieldsWithRaw)
})

// val analyzedCodes = SearchLanguage.languageAnalyzers.map(_.languageTag.toString).toSet
// val notAnalyzedFields =
// CodeLists.iso639Definitions
// .flatMap(_.part1)
// .filterNot(analyzedCodes.contains)
// .map(x => )

// TODO: Not analyzed fields

analyzedFields
}

/** Returns Sequence of DynamicTemplateRequest for a given field.
*
* @param fieldName
Expand Down Expand Up @@ -294,18 +320,19 @@ trait IndexService {

protected def getTaxonomyContextMapping(fieldName: String): NestedField = {
nestedField(fieldName).fields(
keywordField("publicId"),
keywordField("contextId"),
keywordField("path"),
keywordField("contextType"),
keywordField("rootId"),
keywordField("parentIds"),
keywordField("relevanceId"),
booleanField("isActive"),
booleanField("isPrimary"),
keywordField("url"),
nestedField("resourceTypes").fields(
keywordField("id")
List(
ObjectField("domainObject", enabled = Some(false)),
keywordField("publicId"),
keywordField("contextId"),
keywordField("path"),
keywordField("contextType"),
keywordField("rootId"),
keywordField("parentIds"),
keywordField("relevanceId"),
booleanField("isActive"),
booleanField("isPrimary"),
keywordField("url"),
keywordField("resourceTypeIds")
)
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,21 +88,16 @@ trait LearningPathIndexService {
),
dateField("nextRevision.revisionDate") // This is needed for sorting, even if it is never used for learningpaths
)
val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("content") ++
generateLanguageSupportedDynamicTemplates("description") ++
generateLanguageSupportedDynamicTemplates("tags", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("relevance") ++
generateLanguageSupportedDynamicTemplates("breadcrumbs") ++
generateLanguageSupportedDynamicTemplates("name", keepRaw = true) ++
generateLanguageSupportedDynamicTemplates("context.root") ++
generateLanguageSupportedDynamicTemplates("context.relevance") ++
generateLanguageSupportedDynamicTemplates("context.resourceTypes.name") ++
generateLanguageSupportedDynamicTemplates("contexts.root") ++
generateLanguageSupportedDynamicTemplates("contexts.relevance") ++
generateLanguageSupportedDynamicTemplates("contexts.resourceTypes.name")
val dynamics =
languageValuesMapping("title", keepRaw = true) ++
languageValuesMapping("content") ++
languageValuesMapping("description") ++
languageValuesMapping("tags", keepRaw = true) ++
languageValuesMapping("relevance") ++
languageValuesMapping("breadcrumbs") ++
languageValuesMapping("name", keepRaw = true)

properties(fields).dynamicTemplates(dynamics)
properties(fields ++ dynamics)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ trait MultiSearchService {
}
}

def logShardErrors(response: RequestSuccess[SearchResponse]) = {
private def logShardErrors(response: RequestSuccess[SearchResponse]) = {
if (response.result.shards.failed > 0) {
response.body.map { body =>
io.circe.parser.parse(body).toTry match {
Expand Down Expand Up @@ -183,7 +183,11 @@ trait MultiSearchService {
val index = getSearchIndexes(settings).?
val searchToExecute = search(index)
.query(filteredSearch)
// TODO: .suggestions(suggestions(settings.query.underlying, searchLanguage, settings.fallback))
// TODO: This fails because `node` doesn't have a field indexed at "content.bla.bla"
// Even if we do dynamic mapping template, that field does not exist until data is indexed.
// This even happens for other fields in other indexes, so maybe we need to reconsider using the dynamic mapping templates.
// Since this might be a problem for other fields or other languages (especially ones where not every index has every language).
.suggestions(suggestions(settings.query.underlying, searchLanguage, settings.fallback))
.from(pagination.startAt)
.trackTotalHits(true)
.size(pagination.pageSize)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ trait NodeIndexService {
)

val dynamics =
generateLanguageSupportedDynamicTemplates("title")
languageValuesMapping("title") ++
languageValuesMapping("content")

properties(fields).dynamicTemplates(dynamics)
properties(fields ++ dynamics)
}

def indexDocuments(numShards: Option[Int], indexingBundle: IndexingBundle): Try[ReindexResult] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,16 +209,15 @@ trait SearchConverterService {
): List[SearchableTaxonomyContext] = {
taxonomyContexts.map(context =>
SearchableTaxonomyContext(
domainObject = context,
publicId = context.publicId,
contextId = context.contextId,
rootId = context.rootId,
root = context.root,
path = context.path,
breadcrumbs = context.breadcrumbs,
contextType = context.contextType.getOrElse(""),
relevanceId = context.relevanceId,
relevance = context.relevance,
resourceTypes = context.resourceTypes,
resourceTypeIds = context.resourceTypes.map(_.id),
parentIds = context.parentIds,
isPrimary = context.isPrimary,
isActive = context.isActive,
Expand Down Expand Up @@ -991,19 +990,21 @@ trait SearchConverterService {
context: SearchableTaxonomyContext,
language: String
): ApiTaxonomyContextDTO = {
val subjectName = findByLanguageOrBestEffort(context.root.languageValues, language).map(_.value).getOrElse("")
val subjectName =
findByLanguageOrBestEffort(context.domainObject.root.languageValues, language).map(_.value).getOrElse("")
val breadcrumbs = findByLanguageOrBestEffort(context.breadcrumbs.languageValues, language)
.map(_.value)
.getOrElse(Seq.empty)
.toList

val resourceTypes = context.resourceTypes.map(rt => {
val resourceTypes = context.domainObject.resourceTypes.map(rt => {
val name = findByLanguageOrBestEffort(rt.name.languageValues, language)
.getOrElse(LanguageValue(UnknownLanguage.toString, ""))
TaxonomyResourceTypeDTO(id = rt.id, name = name.value, language = name.language)
})

val relevance = findByLanguageOrBestEffort(context.relevance.languageValues, language).map(_.value).getOrElse("")
val relevance =
findByLanguageOrBestEffort(context.domainObject.relevance.languageValues, language).map(_.value).getOrElse("")

ApiTaxonomyContextDTO(
publicId = context.publicId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,16 @@ trait TaxonomyFiltering {
if (filterByNoResourceType) {
Some(
boolQuery().not(
nestedQuery("contexts.resourceTypes", existsQuery("contexts.resourceTypes"))
nestedQuery("contexts", existsQuery("contexts.resourceTypeIds"))
)
)
} else { None }
} else {
Some(
nestedQuery(
"contexts.resourceTypes",
"contexts",
boolQuery().should(
resourceTypes.map(resourceTypeId => termQuery("contexts.resourceTypes.id", resourceTypeId))
resourceTypes.map(resourceTypeId => termQuery("contexts.resourceTypeIds", resourceTypeId))
)
)
)
Expand Down
29 changes: 25 additions & 4 deletions search-api/src/test/scala/no/ndla/searchapi/TestData.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1773,7 +1773,6 @@ object TestData {
publicId = "urn:resource:101",
contextId = "contextId",
rootId = "urn:subject:1",
root = SearchableLanguageValues(Seq(LanguageValue("nb", "Matte"))),
path = "/subject:3/topic:1/topic:151/resource:101",
breadcrumbs = SearchableLanguageList(
Seq(
Expand All @@ -1782,12 +1781,34 @@ object TestData {
),
contextType = LearningResourceType.Article.toString,
relevanceId = "urn:relevance:core",
relevance = SearchableLanguageValues(Seq(LanguageValue("nb", "Kjernestoff"))),
resourceTypes = searchableResourceTypes,
resourceTypeIds = searchableResourceTypes.map(_.id),
parentIds = List("urn:topic:1"),
isPrimary = true,
isActive = true,
url = "/subject:3/topic:1/topic:151/resource:101"
url = "/subject:3/topic:1/topic:151/resource:101",
domainObject = TaxonomyContext(
publicId = "urn:resource:101",
rootId = "urn:subject:1",
root = SearchableLanguageValues(Seq(LanguageValue("nb", "Matte"))),
path = "/subject:3/topic:1/topic:151/resource:101",
breadcrumbs = SearchableLanguageList(
Seq(
LanguageValue("nb", Seq("Matte", "Østen for solen", "Vesten for månen"))
)
),
contextType = Some(LearningResourceType.Article.toString),
relevanceId = "urn:relevance:core",
relevance = SearchableLanguageValues(Seq(LanguageValue("nb", "Kjernestoff"))),
resourceTypes = resourceTypes.map(rt =>
SearchableTaxonomyResourceType(rt.id, SearchableLanguageValues(Seq(LanguageValue("nb", rt.name))))
),
parentIds = List("urn:topic:1"),
isPrimary = true,
contextId = Random.alphanumeric.take(12).mkString,
isVisible = true,
isActive = true,
url = "/subject:3/topic:1/topic:151/resource:101"
)
)

val searchableTaxonomyContexts: List[SearchableTaxonomyContext] = List(
Expand Down
Loading

0 comments on commit a4dd138

Please sign in to comment.