Skip to content

Commit

Permalink
Misc improvements and fixes for local setup (#245)
Browse files Browse the repository at this point in the history
* Misc improvements and fixes for local setup

* Fix ports for infinity and length check when embedding

* Update workflow conditions

* Update workflow

* Revert removing reranker and embedding service url as build args
  • Loading branch information
chiragjn authored Jun 24, 2024
1 parent 969d75e commit 1411d3c
Show file tree
Hide file tree
Showing 16 changed files with 236 additions and 166 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/pre-commit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: pre-commit hooks check

on:
push:
branches:
- "main"
pull_request:
branches:
- "main"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
check_pre_commit_hooks:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'

- name: Install dependencies
run: |
pip install -U pip setuptools wheel
pip install -r backend/requirements.txt
pip check
- name: Install pre-commit
run: |
pip install -U pre-commit
- name: Check files with pre-commit
run: |
pre-commit run --all-files --show-diff-on-failure -v
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Cognita makes it really easy to customize and experiment everything about a RAG

## :whale: Using Docker compose (recommended - version 25+)

Cognita and all of it's services can be run using docker-compose. This is the recommended way to run Cognita locally. Install Docker and docker-compose for your system from: [Docker Compose](https://docs.docker.com/compose/install/)
Cognita and all of its services can be run using docker-compose. This is the recommended way to run Cognita locally. Install Docker and docker-compose for your system from: [Docker Compose](https://docs.docker.com/compose/install/)

You can run the following command to start the services:

Expand All @@ -90,7 +90,7 @@ docker-compose --env-file compose.env up

- The compose file uses `compose.env` file for environment variables. You can modify it as per your needs.
- The compose file will start the following services:
- `postgres` - Used to store metadata for collections and data sources.
- `cognita-db` - Postgres instance used to store metadata for collections and data sources.
- `qdrant-server` - Used to start local vector db server.
- `cognita-backend` - Used to start the FastAPI backend server for Cognita.
- `cognita-frontend` - Used to start the frontend for Cognita.
Expand Down
6 changes: 5 additions & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@ RUN if [ "${ADD_PARSER}" = "1" ]; then python3 -m pip install --use-pep517 --no-
ARG ADD_VECTORDB=0
RUN if [ "${ADD_VECTORDB}" = "1" ]; then python3 -m pip install --use-pep517 --no-cache-dir -r /tmp/vectordb.requirements.txt; fi

# Install Prisma CLI
ARG ADD_PRISMA=0
RUN if [ "${ADD_PRISMA}" = "1" ]; then prisma version; fi

# TODO (chiragjn): These should be removed from here and directly added as environment variables
# Temporary addition until templates have been updated using build args as environment variables
ARG ADD_RERANKER_SVC_URL=""
ENV RERANKER_SVC_URL=${ADD_RERANKER_SVC_URL}

ARG ADD_EMBEDDING_SVC_URL=""
ENV EMBEDDING_SVC_URL=${ADD_EMBEDDING_SVC_URL}

Expand Down
165 changes: 85 additions & 80 deletions backend/modules/metadata_store/prismastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ async def aconnect(cls, **kwargs):
async def acreate_collection(self, collection: CreateCollection) -> Collection:
try:
existing_collection = await self.aget_collection_by_name(collection.name)
if existing_collection:
logger.error(f"Collection with name {collection.name} already exists")
raise HTTPException(
status_code=400,
detail=f"Collection with name {collection.name} already exists",
)
except Exception as e:
logger.error(f"Error:{e}")
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=e)

if existing_collection:
logger.error(f"Collection with name {collection.name} already exists")
raise HTTPException(
status_code=400,
detail=f"Collection with name {collection.name} already exists",
)

try:
logger.info(f"Creating collection: {collection.dict()}")
collection_data = collection.dict()
Expand Down Expand Up @@ -130,16 +131,17 @@ async def adelete_collection(self, collection_name: str, include_runs=False):
async def acreate_data_source(self, data_source: CreateDataSource) -> DataSource:
try:
existing_data_source = await self.aget_data_source_from_fqn(data_source.fqn)
if existing_data_source:
logger.error(f"Data source with fqn {data_source.fqn} already exists")
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source.fqn} already exists",
)
except Exception as e:
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=f"Error: {e}")

if existing_data_source:
logger.error(f"Data source with fqn {data_source.fqn} already exists")
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source.fqn} already exists",
)

try:
data = data_source.dict()
data["metadata"] = json.dumps(data["metadata"])
Expand Down Expand Up @@ -175,32 +177,34 @@ async def aassociate_data_source_with_collection(
) -> Collection:
try:
existing_collection = await self.aget_collection_by_name(collection_name)
if not existing_collection:
logger.error(f"Collection with name {collection_name} does not exist")
raise HTTPException(
status_code=400,
detail=f"Collection with name {collection_name} does not exist",
)
except Exception as e:
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=f"Error: {e}")

if not existing_collection:
logger.error(f"Collection with name {collection_name} does not exist")
raise HTTPException(
status_code=400,
detail=f"Collection with name {collection_name} does not exist",
)

try:
data_source = await self.aget_data_source_from_fqn(
data_source_association.data_source_fqn
)
if not data_source:
logger.error(
f"Data source with fqn {data_source_association.data_source_fqn} does not exist"
)
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_association.data_source_fqn} does not exist",
)
except Exception as e:
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=f"Error: {e}")

if not data_source:
logger.error(
f"Data source with fqn {data_source_association.data_source_fqn} does not exist"
)
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_association.data_source_fqn} does not exist",
)

logger.info(f"Data source to associate: {data_source}")
try:
# Append datasource to existing collection
Expand Down Expand Up @@ -251,48 +255,50 @@ async def aunassociate_data_source_with_collection(
) -> Collection:
try:
collection = await self.aget_collection_by_name(collection_name)
if not collection:
logger.error(f"Collection with name {collection_name} does not exist")
raise HTTPException(
status_code=400,
detail=f"Collection with name {collection_name} does not exist",
)
except Exception as e:
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=f"Error: {e}")

if not collection:
logger.error(f"Collection with name {collection_name} does not exist")
raise HTTPException(
status_code=400,
detail=f"Collection with name {collection_name} does not exist",
)

try:
data_source = await self.aget_data_source_from_fqn(data_source_fqn)
if not data_source:
logger.error(f"Data source with fqn {data_source_fqn} does not exist")
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} does not exist",
)
except Exception as e:
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=f"Error: {e}")

try:
associated_data_sources = collection.associated_data_sources
if not associated_data_sources:
logger.error(
f"No associated data sources found for collection {collection_name}"
)
raise HTTPException(
status_code=400,
detail=f"No associated data sources found for collection {collection_name}",
)
if data_source_fqn not in associated_data_sources:
logger.error(
f"Data source with fqn {data_source_fqn} not associated with collection {collection_name}"
)
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} not associated with collection {collection_name}",
)
if not data_source:
logger.error(f"Data source with fqn {data_source_fqn} does not exist")
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} does not exist",
)

associated_data_sources.pop(data_source_fqn, None)
associated_data_sources = collection.associated_data_sources
if not associated_data_sources:
logger.error(
f"No associated data sources found for collection {collection_name}"
)
raise HTTPException(
status_code=400,
detail=f"No associated data sources found for collection {collection_name}",
)
if data_source_fqn not in associated_data_sources:
logger.error(
f"Data source with fqn {data_source_fqn} not associated with collection {collection_name}"
)
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} not associated with collection {collection_name}",
)

associated_data_sources.pop(data_source_fqn, None)
try:
updated_collection = await self.db.collection.update(
where={"name": collection_name},
data={"associated_data_sources": json.dumps(associated_data_sources)},
Expand Down Expand Up @@ -325,39 +331,38 @@ async def adelete_data_source(self, data_source_fqn: str):
# Check if data source exists if not raise an error
try:
data_source = await self.aget_data_source_from_fqn(data_source_fqn)
if not data_source:
logger.error(f"Data source with fqn {data_source_fqn} does not exist")
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} does not exist",
)
except Exception as e:
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=f"Error: {e}")

if not data_source:
logger.error(f"Data source with fqn {data_source_fqn} does not exist")
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} does not exist",
)

# Check if data source is associated with any collection
try:
collections = await self.aget_collections()
for collection in collections:
associated_data_sources = collection.associated_data_sources
if (
associated_data_sources
and data_source_fqn in associated_data_sources
):
logger.error(
f"Data source with fqn {data_source_fqn} is already associated with "
f"collection {collection.name}"
)
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} is associated "
f"with collection {collection.name}. Delete the necessary collections "
f"or unassociate them from the collection(s) before deleting the data source",
)
except Exception as e:
logger.error(f"Error: {e}")
raise HTTPException(status_code=500, detail=f"Error: {e}")

for collection in collections:
associated_data_sources = collection.associated_data_sources
if associated_data_sources and data_source_fqn in associated_data_sources:
logger.error(
f"Data source with fqn {data_source_fqn} is already associated with "
f"collection {collection.name}"
)
raise HTTPException(
status_code=400,
detail=f"Data source with fqn {data_source_fqn} is associated "
f"with collection {collection.name}. Delete the necessary collections "
f"or unassociate them from the collection(s) before deleting the data source",
)

# Delete the data source
try:
logger.info(f"Data source to delete: {data_source}")
Expand Down
Loading

0 comments on commit 1411d3c

Please sign in to comment.