Migrate with cross references

The script listed here to migrate data with python v4 did not work for me. The cross references did not migrate.

I used the following script to migrate with cross references. If there’s an interruption in migration, the script prints the last uuid transferred. It can be used to continue with after_key.

from typing import List, Optional
from tqdm import tqdm
import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient
import os
import json

from dotenv import load_dotenv

load_dotenv()

client_src = weaviate.connect_to_local(
    host=os.getenv("SOURCE_HOST", "localhost"),
    port=8080,
    grpc_port=50051
)

client_tgt = weaviate.connect_to_local(
    host=os.getenv("DEST_HOST", "localhost"),
    port=8080,
    grpc_port=50051
)

def migrate_data(collection_src: Collection, collection_tgt: Collection):


    response = collection_src.aggregate.over_all(total_count=True)
    tgt_response = collection_tgt.aggregate.over_all(total_count=True)

    after_key = None

    try:

        with collection_tgt.batch.dynamic() as batch:
            with tqdm(total=response.total_count-tgt_response.total_count) as pbar:
                while True:
                    objects = collection_src.query.fetch_objects(
                        limit=250,
                        after=after_key,
                        include_vector=True,
                        return_references=[
                            wvc.query.QueryReference(
                                link_on="hasMetadata",
                                return_properties=[]
                            )
                        ]
                    ).objects

                    if len(objects) == 0:
                        break

                    for q in objects:

                        batch.add_object(
                            properties=q.properties,
                            vector=q.vector.get("default", None),
                            uuid=q.uuid,
                            references= {
                                "hasMetadata": [r.uuid for r in q.references["hasMetadata"].objects]
                            } if "hasMetadata" in q.references else None
                        )
                        pbar.update(1)
                    
                    after_key = objects[-1].uuid

    except Exception as e:
        print(after_key)
        raise e

    return True

def print_last_uuid():
    import weaviate
    import weaviate.classes as wvc
    from weaviate.collections.classes.grpc import Sort

    client_tgt = weaviate.connect_to_local(
            host=os.getenv("DEST_HOST", "localhost"),
            port=8080,
            grpc_port=50051
        )

    article = client_tgt.collections.get("Document")

    response = article.query.fetch_objects(
            return_metadata=wvc.query.MetadataQuery(creation_time=True),
            sort=Sort.by_property(name="_creationTimeUnix", ascending=False),
            limit=3
        )

    for r in response.objects:
        print(r.uuid)
        print(r.metadata.creation_time)

if __name__ == "__main__":

    reviews_src = client_src.collections.get("Document")
    reviews_tgt = client_tgt.collections.get("Document")

    migrate_data(reviews_src, reviews_tgt)
    # print_last_uuid()

Hi!

Sorry to hear that. We are working to improve that migration tool.

Indeed cross-references are tricky.

Hope to have an update on this soon.

Hi!
Any updates on the migration script?