diff --git a/app/main/lib/elasticsearch.py b/app/main/lib/elasticsearch.py index 6d67fc3f..1f0e6d98 100644 --- a/app/main/lib/elasticsearch.py +++ b/app/main/lib/elasticsearch.py @@ -60,6 +60,14 @@ def truncate_query(query, clause_count): else: return None +def merge_contexts(body, found_doc): + if not body.get("contexts"): + body["contexts"] = [body["context"]] + for context in found_doc["_source"].get("contexts", []): + if context not in body["contexts"]: + body["contexts"].append(context) + return body + def store_document(body, doc_id): es = Elasticsearch(app.config['ELASTICSEARCH_URL']) if doc_id: @@ -70,7 +78,7 @@ def store_document(body, doc_id): if found_doc: result = es.update( id=doc_id, - body={"doc": body}, + body={"doc": merge_contexts(body, found_doc)}, index=app.config['ELASTICSEARCH_SIMILARITY'] ) else: @@ -92,10 +100,27 @@ def store_document(body, doc_id): 'success': success } -def delete_document(doc_id, quiet): +def delete_context_from_found_doc(context, found_doc, doc_id): + found_doc["contexts"] = [row for row in found_doc.get("contexts", []) if context != row] + es = Elasticsearch(app.config['ELASTICSEARCH_URL']) + result = es.update( + id=doc_id, + body={"doc": found_doc}, + index=app.config['ELASTICSEARCH_SIMILARITY'] + ) + return result + +def delete_document(doc_id, context, quiet): es = Elasticsearch(app.config['ELASTICSEARCH_URL']) try: - return es.delete(index=app.config['ELASTICSEARCH_SIMILARITY'], id=doc_id) + found_doc = es.get(index=app.config['ELASTICSEARCH_SIMILARITY'], id=doc_id) + except elasticsearch.exceptions.NotFoundError: + found_doc = None + try: + if found_doc and context in found_doc.get("contexts", []) and len(found_doc.get("contexts", [])) > 1: + return delete_context_from_found_doc(context, found_doc, doc_id) + else: + return es.delete(index=app.config['ELASTICSEARCH_SIMILARITY'], id=doc_id) except: if quiet: return { diff --git a/app/main/lib/similarity.py b/app/main/lib/similarity.py index 8736c9d1..78405d27 100644 --- a/app/main/lib/similarity.py +++ b/app/main/lib/similarity.py @@ -63,7 +63,7 @@ def delete_item(item, similarity_type): elif similarity_type == "image": response = delete_image(item) elif similarity_type == "text": - response = delete_text(item.get("doc_id"), item.get("quiet", False)) + response = delete_text(item.get("doc_id"), item.get("context", {}), item.get("quiet", False)) app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for delete was {response}") return response diff --git a/app/main/lib/similarity_helpers.py b/app/main/lib/similarity_helpers.py index d4795982..31a12b82 100644 --- a/app/main/lib/similarity_helpers.py +++ b/app/main/lib/similarity_helpers.py @@ -2,6 +2,18 @@ from app.main import db +def drop_context_from_text_record(record, context): + deleted = False + record["contexts"] = [row for row in record.get("contexts", []) if context != row] + db.session.add(record) + try: + db.session.commit() + except Exception as exception: + db.session.rollback() + raise exception + deleted = True + return deleted + def drop_context_from_record(record, context): deleted = False record.context = [row for row in record.context if context != row] diff --git a/app/main/lib/text_similarity.py b/app/main/lib/text_similarity.py index 96bf7c0d..c5bad539 100644 --- a/app/main/lib/text_similarity.py +++ b/app/main/lib/text_similarity.py @@ -3,12 +3,15 @@ from app.main.lib.elasticsearch import language_to_analyzer, generate_matches, truncate_query, store_document, delete_document from app.main.lib.shared_models.shared_model import SharedModel ELASTICSEARCH_DEFAULT_LIMIT = 10000 -def delete_text(doc_id, quiet): - return delete_document(doc_id, quiet) +def delete_text(doc_id, context, quiet): + return delete_document(doc_id, context, quiet) def get_document_body(body): for model_key in body.pop("models", []): body['model_'+model_key] = 1 + context = body.get("context", {}) + if context: + body["contexts"] = [context] if model_key != 'elasticsearch': model = SharedModel.get_client(model_key) vector = model.get_shared_model_response(body['content']) diff --git a/app/test/test_similarity.py b/app/test/test_similarity.py index 3051206d..015ebcc4 100644 --- a/app/test/test_similarity.py +++ b/app/test/test_similarity.py @@ -342,7 +342,7 @@ def test_elasticsearch_delete_200(self): with self.client: delete_response = self.client.delete( '/text/similarity/', - data=json.dumps({"doc_id": "abcdef", "quiet": True}), + data=json.dumps({"doc_id": "abcdef", "quiet": True, 'context': { 'dbid': 54 }}), content_type='application/json' ) result = json.loads(delete_response.data.decode()) @@ -361,7 +361,26 @@ def test_elasticsearch_delete_text(self): doc = [e for e in results["hits"]["hits"] if e["_source"]['content'] == term['text']][0] delete_response = self.client.delete( '/text/similarity/', - data=json.dumps({"doc_id": doc["_id"]}), + data=json.dumps({"doc_id": doc["_id"], 'context': { 'dbid': 54 }}), + content_type='application/json' + ) + result = json.loads(delete_response.data.decode()) + self.assertEqual('deleted', result['result']) + with self.client: + term = { 'doc_id': '123', 'text': 'how to slice a banana', 'model': 'elasticsearch', 'context': { 'dbid': 54 } } + post_response = self.client.post('/text/similarity/', data=json.dumps(term), content_type='application/json') + term = { 'doc_id': '123', 'text': 'how to slice a banana', 'model': 'elasticsearch', 'context': { 'dbid': 55 } } + post_response = self.client.post('/text/similarity/', data=json.dumps(term), content_type='application/json') + es = Elasticsearch(app.config['ELASTICSEARCH_URL']) + es.indices.refresh(index=app.config['ELASTICSEARCH_SIMILARITY']) + result = json.loads(post_response.data.decode()) + self.assertEqual(True, result['success']) + es = Elasticsearch(app.config['ELASTICSEARCH_URL']) + results = es.search(body={"query": {"match_all": {}}},index=app.config['ELASTICSEARCH_SIMILARITY']) + doc = [e for e in results["hits"]["hits"] if e["_source"]['content'] == term['text']][0] + delete_response = self.client.delete( + '/text/similarity/', + data=json.dumps({"doc_id": doc["_id"], 'context': { 'dbid': 54 }}), content_type='application/json' ) result = json.loads(delete_response.data.decode())