|
| 1 | +# Multi-User Basic Auth |
| 2 | + |
| 3 | +## Why Multi-user Auth? |
| 4 | + |
| 5 | +Multi-user authentication can be crucial for several reasons. Let's delve into this topic. |
| 6 | + |
| 7 | +**Security**—The primary concern is the security of your deployments. You need to control who can access your data and |
| 8 | +ensure they are authorized to do so. You may wonder, since Chroma offers basic and token-based authentication, why is |
| 9 | +multi-user authentication necessary? |
| 10 | + |
| 11 | +You should never share your Chroma access credentials with your users or any app that depends on Chroma. The answer to |
| 12 | +this concern is a categorical NO. |
| 13 | + |
| 14 | +Another reason to consider multi-user authentication is to differentiate access to your data. However, the solution |
| 15 | +presented here doesn't provide this. It's a stepping stone towards our upcoming article on multi-tenancy and securing |
| 16 | +Chroma data. |
| 17 | + |
| 18 | +Last but not least is auditing. While we acknowledge this is not for everybody, there is ~~an~~ increasing pressure to |
| 19 | +provide visibility into your app via auditable events. |
| 20 | + |
| 21 | +Multi-user experiences - Not all GenAI apps are intended to be private or individual. This is another reason to consider |
| 22 | +and implement multi-user authentication and authorization. |
| 23 | + |
| 24 | +## Dive right in. |
| 25 | + |
| 26 | +Let's get straight to the point and build a multi-user authorization with basic authentication. Here's our goal: |
| 27 | + |
| 28 | +- Develop a server-side authorization provider that can read multiple users from a `.htpasswd` file |
| 29 | +- Generate a multi-user `.htpasswd` file with several test users |
| 30 | +- Package our plugin with the Chroma base image and execute it using Docker Compose |
| 31 | + |
| 32 | +!!! note "Auth CIP" |
| 33 | + |
| 34 | + Chroma has [detailed info](https://github.com/chroma-core/chroma/blob/main/docs/CIP_2_Auth_Providers_Proposal.md) about how its |
| 35 | + authentication and authorization are implemented. Should you want to learn more go read the CIP (Chroma Improvement Proposal doc). |
| 36 | + |
| 37 | +### The Plugin |
| 38 | + |
| 39 | +```python |
| 40 | +import importlib |
| 41 | +import logging |
| 42 | +from typing import Dict, cast, TypeVar, Optional |
| 43 | + |
| 44 | +from chromadb.auth import ( |
| 45 | + ServerAuthCredentialsProvider, |
| 46 | + AbstractCredentials, |
| 47 | + SimpleUserIdentity, |
| 48 | +) |
| 49 | +from chromadb.auth.registry import register_provider |
| 50 | +from chromadb.config import System |
| 51 | +from chromadb.telemetry.opentelemetry import ( |
| 52 | + OpenTelemetryGranularity, |
| 53 | + trace_method, |
| 54 | + add_attributes_to_current_span, |
| 55 | +) |
| 56 | +from pydantic import SecretStr |
| 57 | +from overrides import override |
| 58 | + |
| 59 | +T = TypeVar("T") |
| 60 | + |
| 61 | +logger = logging.getLogger(__name__) |
| 62 | + |
| 63 | + |
| 64 | +@register_provider("multi_user_htpasswd_file") |
| 65 | +class MultiUserHtpasswdFileServerAuthCredentialsProvider(ServerAuthCredentialsProvider): |
| 66 | + _creds: Dict[str, SecretStr] # contains user:password-hash |
| 67 | + |
| 68 | + def __init__(self, system: System) -> None: |
| 69 | + super().__init__(system) |
| 70 | + try: |
| 71 | + self.bc = importlib.import_module("bcrypt") |
| 72 | + except ImportError: |
| 73 | + raise ValueError( |
| 74 | + "The bcrypt python package is not installed. " |
| 75 | + "Please install it with `pip install bcrypt`" |
| 76 | + ) |
| 77 | + system.settings.require("chroma_server_auth_credentials_file") |
| 78 | + _file = str(system.settings.chroma_server_auth_credentials_file) |
| 79 | + self._creds = dict() |
| 80 | + with open(_file, "r") as f: |
| 81 | + for line in f: |
| 82 | + _raw_creds = [v for v in line.strip().split(":")] |
| 83 | + if len(_raw_creds) != 2: |
| 84 | + raise ValueError( |
| 85 | + "Invalid Htpasswd credentials found in " |
| 86 | + f"[{str(system.settings.chroma_server_auth_credentials_file)}]. " |
| 87 | + "Must be <username>:<bcrypt passwd>." |
| 88 | + ) |
| 89 | + self._creds[_raw_creds[0]] = SecretStr(_raw_creds[1]) |
| 90 | + |
| 91 | + @trace_method( # type: ignore |
| 92 | + "MultiUserHtpasswdFileServerAuthCredentialsProvider.validate_credentials", |
| 93 | + OpenTelemetryGranularity.ALL, |
| 94 | + ) |
| 95 | + @override |
| 96 | + def validate_credentials(self, credentials: AbstractCredentials[T]) -> bool: |
| 97 | + _creds = cast(Dict[str, SecretStr], credentials.get_credentials()) |
| 98 | + |
| 99 | + if len(_creds) != 2 or "username" not in _creds or "password" not in _creds: |
| 100 | + logger.error( |
| 101 | + "Returned credentials did match expected format: " |
| 102 | + "dict[username:SecretStr, password: SecretStr]" |
| 103 | + ) |
| 104 | + add_attributes_to_current_span( |
| 105 | + { |
| 106 | + "auth_succeeded": False, |
| 107 | + "auth_error": "Returned credentials did match expected format: " |
| 108 | + "dict[username:SecretStr, password: SecretStr]", |
| 109 | + } |
| 110 | + ) |
| 111 | + return False # early exit on wrong format |
| 112 | + _user_pwd_hash = ( |
| 113 | + self._creds[_creds["username"].get_secret_value()] |
| 114 | + if _creds["username"].get_secret_value() in self._creds |
| 115 | + else None |
| 116 | + ) |
| 117 | + validation_response = _user_pwd_hash is not None and self.bc.checkpw( |
| 118 | + _creds["password"].get_secret_value().encode("utf-8"), |
| 119 | + _user_pwd_hash.get_secret_value().encode("utf-8"), |
| 120 | + ) |
| 121 | + add_attributes_to_current_span( |
| 122 | + { |
| 123 | + "auth_succeeded": validation_response, |
| 124 | + "auth_error": f"Failed to validate credentials for user {_creds['username'].get_secret_value()}" |
| 125 | + if not validation_response |
| 126 | + else "", |
| 127 | + } |
| 128 | + ) |
| 129 | + return validation_response |
| 130 | + |
| 131 | + @override |
| 132 | + def get_user_identity( |
| 133 | + self, credentials: AbstractCredentials[T] |
| 134 | + ) -> Optional[SimpleUserIdentity]: |
| 135 | + _creds = cast(Dict[str, SecretStr], credentials.get_credentials()) |
| 136 | + return SimpleUserIdentity(_creds["username"].get_secret_value()) |
| 137 | + |
| 138 | +``` |
| 139 | + |
| 140 | +In less than 80 lines of code, we have our plugin. Let's delve into and explain some of the key points of the code |
| 141 | +above: |
| 142 | + |
| 143 | +- `__init__` - Here, we dynamically import bcrypt, which we'll use to check user credentials. We also read the |
| 144 | + configured credentials file - `server.htpasswd` line by line, to retrieve each user (we assume each line contains a |
| 145 | + new user with its bcrypt hash). |
| 146 | +- `validate_credentials` - This is where the magic happens. We initially perform some lightweight validations on the |
| 147 | + credentials parsed by Chroma and passed to the plugin. Then, we attempt to retrieve the user and its hash from |
| 148 | + the `_creds` dictionary. The final step is to verify the hash. We've also added some attributes to monitor our |
| 149 | + authentication process in our observability layer (we have an upcoming article about this). |
| 150 | +- `get_user_identity` - Constructs a simple user identity, which the authorization plugin uses to verify permissions. |
| 151 | + Although not needed for now, each authentication plugin must implement this, as user identities are crucial for |
| 152 | + authorization. |
| 153 | + |
| 154 | +We'll store our plugin in `__init__.py` within the following directory |
| 155 | +structure - `chroma_auth/authn/basic/__init__.py` (refer to the repository for details). |
| 156 | + |
| 157 | +### Password file |
| 158 | + |
| 159 | +Now that we have our plugin let’s create a password file with a few users: |
| 160 | + |
| 161 | +Initial user: |
| 162 | + |
| 163 | +```bash |
| 164 | +echo "password123" | htpasswd -iBc server.htpasswd admin |
| 165 | +``` |
| 166 | + |
| 167 | +The above will create (`-c` flag) a new server.htpasswd file with initial user `admin` and the password will be read |
| 168 | +from stdin (`-i` flag) and saved as bcrypt hash (`-B` flag) |
| 169 | + |
| 170 | +Let’s add another user: |
| 171 | + |
| 172 | +```bash |
| 173 | +echo "password123" | htpasswd -iB server.htpasswd user1 |
| 174 | +``` |
| 175 | + |
| 176 | +Now our `server.htpasswd` file will look like this: |
| 177 | + |
| 178 | +```bash |
| 179 | +admin:$2y$05$vkBK4b1Vk5O98jNHgr.uduTJsTOfM395sKEKe48EkJCVPH/MBIeHK |
| 180 | +user1:$2y$05$UQ0kC2x3T2XgeN4WU12BdekUwCJmLjJNhMaMtFNolYdj83OqiEpVu |
| 181 | +``` |
| 182 | + |
| 183 | +Moving on to docker setup. |
| 184 | + |
| 185 | +### Docker compose setup |
| 186 | + |
| 187 | +Let’s create a `Dockerfile` to bundle our plugin with the official Chroma image: |
| 188 | + |
| 189 | +```docker |
| 190 | +ARG CHROMA_VERSION=0.4.24 |
| 191 | +FROM ghcr.io/chroma-core/chroma:${CHROMA_VERSION} as base |
| 192 | +
|
| 193 | +COPY chroma_auth/ /chroma/chroma_auth |
| 194 | +``` |
| 195 | + |
| 196 | +This will pick up the official docker image for Chroma and will add our plugin directory structure so that we can use |
| 197 | +it. |
| 198 | + |
| 199 | +Now let’s create an `.env` file to load our plugin: |
| 200 | + |
| 201 | +```bash |
| 202 | +CHROMA_SERVER_AUTH_PROVIDER="chromadb.auth.basic.BasicAuthServerProvider" |
| 203 | +CHROMA_SERVER_AUTH_CREDENTIALS_FILE="server.htpasswd" |
| 204 | +CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER="chroma_auth.authn.basic.MultiUserHtpasswdFileServerAuthCredentialsProvider" |
| 205 | +``` |
| 206 | + |
| 207 | +And finally our `docker-compose.yaml`: |
| 208 | + |
| 209 | +```yaml |
| 210 | +version: '3.9' |
| 211 | + |
| 212 | +networks: |
| 213 | + net: |
| 214 | + driver: bridge |
| 215 | + |
| 216 | +services: |
| 217 | + server: |
| 218 | + image: chroma-server |
| 219 | + build: |
| 220 | + dockerfile: Dockerfile |
| 221 | + volumes: |
| 222 | + - ./chroma-data:/chroma/chroma |
| 223 | + - ./server.htpasswd:/chroma/server.htpasswd |
| 224 | + command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" |
| 225 | + environment: |
| 226 | + - IS_PERSISTENT=TRUE |
| 227 | + - CHROMA_SERVER_AUTH_PROVIDER=${CHROMA_SERVER_AUTH_PROVIDER} |
| 228 | + - CHROMA_SERVER_AUTH_CREDENTIALS_FILE=${CHROMA_SERVER_AUTH_CREDENTIALS_FILE} |
| 229 | + - CHROMA_SERVER_AUTH_CREDENTIALS=${CHROMA_SERVER_AUTH_CREDENTIALS} |
| 230 | + - CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER=${CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER} |
| 231 | + - CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER} |
| 232 | + - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} |
| 233 | + - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} |
| 234 | + - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} |
| 235 | + - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} |
| 236 | + - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} |
| 237 | + - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} |
| 238 | + restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" |
| 239 | + ports: |
| 240 | + - "8000:8000" |
| 241 | + healthcheck: |
| 242 | + # Adjust below to match your container port |
| 243 | + test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v1/heartbeat" ] |
| 244 | + interval: 30s |
| 245 | + timeout: 10s |
| 246 | + retries: 3 |
| 247 | + networks: |
| 248 | + - net |
| 249 | + |
| 250 | +``` |
| 251 | + |
| 252 | +### The test |
| 253 | + |
| 254 | +Let’s run our docker compose setup: |
| 255 | + |
| 256 | +```yaml |
| 257 | +docker compose --env-file ./.env up --build |
| 258 | +``` |
| 259 | + |
| 260 | +You *should* see the following log message if the plugin was successfully loaded: |
| 261 | + |
| 262 | +```bash |
| 263 | +server-1 | DEBUG: [01-04-2024 14:10:13] Starting component MultiUserHtpasswdFileServerAuthCredentialsProvider |
| 264 | +server-1 | DEBUG: [01-04-2024 14:10:13] Starting component BasicAuthServerProvider |
| 265 | +server-1 | DEBUG: [01-04-2024 14:10:13] Starting component FastAPIChromaAuthMiddleware |
| 266 | + |
| 267 | +``` |
| 268 | + |
| 269 | +Once our container is up and running, let’s see if our multi-user auth works: |
| 270 | + |
| 271 | +```bash |
| 272 | +import chromadb |
| 273 | +from chromadb.config import Settings |
| 274 | + |
| 275 | +client = chromadb.HttpClient( |
| 276 | + settings=Settings(chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",chroma_client_auth_credentials="admin:password123")) |
| 277 | +client.heartbeat() # this should work with or without authentication - it is a public endpoint |
| 278 | +client.get_or_create_collection("test_collection") # this is a protected endpoint and requires authentication |
| 279 | +client.list_collections() # this is a protected endpoint and requires authentication |
| 280 | +``` |
| 281 | + |
| 282 | +The above code should return the list of collections, a single collection `test_collection` that we created. |
| 283 | + |
| 284 | +```bash |
| 285 | +(chromadb-multi-user-basic-auth-py3.11) [chromadb-multi-user-basic-auth]python 19:51:38 ☁ main ☂ ⚡ ✚ |
| 286 | +Python 3.11.7 (main, Dec 30 2023, 14:03:09) [Clang 15.0.0 (clang-1500.1.0.2.5)] on darwin |
| 287 | +Type "help", "copyright", "credits" or "license" for more information. |
| 288 | +>>> import chromadb |
| 289 | +>>> from chromadb.config import Settings |
| 290 | +>>> |
| 291 | +>>> client = chromadb.HttpClient( |
| 292 | +... settings=Settings(chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",chroma_client_auth_credentials="admin:password123")) |
| 293 | +>>> client.heartbeat() # this should work with or without authentication - it is a public endpoint |
| 294 | +1711990302270211007 |
| 295 | +>>> |
| 296 | +>>> client.list_collections() # this is a protected endpoint and requires authentication |
| 297 | +[] |
| 298 | +``` |
| 299 | + |
| 300 | +Great, now let’s test for our other user: |
| 301 | + |
| 302 | +```bash |
| 303 | +client = chromadb.HttpClient( |
| 304 | + settings=Settings(chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",chroma_client_auth_credentials="user1:password123")) |
| 305 | +``` |
| 306 | + |
| 307 | +Works just as well (logs omitted for brevity). |
| 308 | + |
| 309 | +To ensure that our plugin works as expected let’s also test with an user that is not in our `server.htpasswd` file: |
| 310 | + |
| 311 | +```bash |
| 312 | +client = chromadb.HttpClient( |
| 313 | + settings=Settings(chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",chroma_client_auth_credentials="invalid_user:password123")) |
| 314 | +``` |
| 315 | + |
| 316 | +```bash |
| 317 | +Traceback (most recent call last): |
| 318 | + File "<stdin>", line 1, in <module> |
| 319 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/__init__.py", line 197, in HttpClient |
| 320 | + return ClientCreator(tenant=tenant, database=database, settings=settings) |
| 321 | + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 322 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/api/client.py", line 144, in __init__ |
| 323 | + self._validate_tenant_database(tenant=tenant, database=database) |
| 324 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/api/client.py", line 445, in _validate_tenant_database |
| 325 | + raise e |
| 326 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/api/client.py", line 438, in _validate_tenant_database |
| 327 | + self._admin_client.get_tenant(name=tenant) |
| 328 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/api/client.py", line 486, in get_tenant |
| 329 | + return self._server.get_tenant(name=name) |
| 330 | + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 331 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/telemetry/opentelemetry/__init__.py", line 127, in wrapper |
| 332 | + return f(*args, **kwargs) |
| 333 | + ^^^^^^^^^^^^^^^^^^ |
| 334 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/api/fastapi.py", line 200, in get_tenant |
| 335 | + raise_chroma_error(resp) |
| 336 | + File "/Users/tazarov/Library/Caches/pypoetry/virtualenvs/chromadb-multi-user-basic-auth-vIZuPNTE-py3.11/lib/python3.11/site-packages/chromadb/api/fastapi.py", line 649, in raise_chroma_error |
| 337 | + raise chroma_error |
| 338 | +chromadb.errors.AuthorizationError: Unauthorized |
| 339 | + |
| 340 | +``` |
| 341 | + |
| 342 | +As expected, we get auth error when trying to connect to Chroma (the client initialization validates the tenant and DB |
| 343 | +which are both protected endpoints which raises the exception above). |
0 commit comments