The script listed here to migrate data with python v4 did not work for me. The cross references did not migrate.
I used the following script to migrate with cross references. If there’s an interruption in migration, the script prints the last uuid
transferred. It can be used to continue with after_key
.
from typing import List, Optional
from tqdm import tqdm
import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient
import os
import json
from dotenv import load_dotenv
load_dotenv()
client_src = weaviate.connect_to_local(
host=os.getenv("SOURCE_HOST", "localhost"),
port=8080,
grpc_port=50051
)
client_tgt = weaviate.connect_to_local(
host=os.getenv("DEST_HOST", "localhost"),
port=8080,
grpc_port=50051
)
def migrate_data(collection_src: Collection, collection_tgt: Collection):
response = collection_src.aggregate.over_all(total_count=True)
tgt_response = collection_tgt.aggregate.over_all(total_count=True)
after_key = None
try:
with collection_tgt.batch.dynamic() as batch:
with tqdm(total=response.total_count-tgt_response.total_count) as pbar:
while True:
objects = collection_src.query.fetch_objects(
limit=250,
after=after_key,
include_vector=True,
return_references=[
wvc.query.QueryReference(
link_on="hasMetadata",
return_properties=[]
)
]
).objects
if len(objects) == 0:
break
for q in objects:
batch.add_object(
properties=q.properties,
vector=q.vector.get("default", None),
uuid=q.uuid,
references= {
"hasMetadata": [r.uuid for r in q.references["hasMetadata"].objects]
} if "hasMetadata" in q.references else None
)
pbar.update(1)
after_key = objects[-1].uuid
except Exception as e:
print(after_key)
raise e
return True
def print_last_uuid():
import weaviate
import weaviate.classes as wvc
from weaviate.collections.classes.grpc import Sort
client_tgt = weaviate.connect_to_local(
host=os.getenv("DEST_HOST", "localhost"),
port=8080,
grpc_port=50051
)
article = client_tgt.collections.get("Document")
response = article.query.fetch_objects(
return_metadata=wvc.query.MetadataQuery(creation_time=True),
sort=Sort.by_property(name="_creationTimeUnix", ascending=False),
limit=3
)
for r in response.objects:
print(r.uuid)
print(r.metadata.creation_time)
if __name__ == "__main__":
reviews_src = client_src.collections.get("Document")
reviews_tgt = client_tgt.collections.get("Document")
migrate_data(reviews_src, reviews_tgt)
# print_last_uuid()