@@ -89,21 +89,32 @@ client = chromadb.PersistentClient(path="my_local_data")
89
89
remote_client = chromadb.HttpClient()
90
90
91
91
collection = client.get_or_create_collection(" local_collection" )
92
- collection.add(ids = [" 1" ," 2" ],documents = [" hello world" ," hello ChromaDB" ],metadatas = [{" a" :1 },{" b" :2 }])
93
- remote_collection = remote_client.get_or_create_collection(" remote_collection" ,metadata = collection.metadata)
92
+ collection.add(
93
+ ids = [" 1" ," 2" ],
94
+ documents = [" hello world" ," hello ChromaDB" ],
95
+ metadatas = [{" a" :1 },{" b" :2 }])
96
+ remote_collection = remote_client.get_or_create_collection(" remote_collection" ,
97
+ metadata = collection.metadata)
94
98
existing_count = collection.count()
95
99
batch_size = 10
96
100
for i in range (0 , existing_count, batch_size):
97
- batch = collection.get(include = [" metadatas" , " documents" , " embeddings" ], limit = batch_size, offset = i)
98
- remote_collection.add(ids = batch[" ids" ], documents = batch[" documents" ], metadatas = batch[" metadatas" ],
99
- embeddings = batch[" embeddings" ])
101
+ batch = collection.get(
102
+ include = [" metadatas" , " documents" , " embeddings" ],
103
+ limit = batch_size,
104
+ offset = i)
105
+ remote_collection.add(
106
+ ids = batch[" ids" ],
107
+ documents = batch[" documents" ],
108
+ metadatas = batch[" metadatas" ],
109
+ embeddings = batch[" embeddings" ])
100
110
```
101
111
102
112
!!! note "Using ChromaDB Data Pipes"
103
113
There is a more efficient way to copy data between local and remote collections using ChromaDB Data Pipes package.
104
114
```bash
105
115
pip install chromadb-data-pipes
106
- cdp export "file://path/to_local_data/local_collection" | cdp import "http://remote_chromadb:port/remote_collection " --create
116
+ cdp export "file://path/to_local_data/local_collection" | \
117
+ cdp import "http://remote_chromadb:port/remote_collection " --create
107
118
```
108
119
109
120
### Cloning a collection
0 commit comments