Ref2vec-centroid on v4 client

How can i use ref2vec-centroid on v4 python client?

I use this code, but vector was empty:

import asyncio

import weaviate
from weaviate.classes.config import Configure
from weaviate.client import WeaviateAsyncClient
from weaviate.collections.classes.grpc import QueryReference
from weaviate.collections.classes.config_vectorizers import Multi2VecField
from weaviate.collections.classes.config import Property, DataType, ReferenceProperty

from core.settings import get_settings

settings = get_settings()


async def init_collections(client: WeaviateAsyncClient):

    if await client.collections.exists('Short'):
        return
    if await client.collections.exists('UserInteractions'):
        return
    await client.collections.create(
        "Short",
        properties=[
            Property(
                name='object_id',
                data_type=DataType.INT,
                description="Unique identifier for the short.",
                skip_vectorization=True,
            ),
            Property(
                name='description',
                data_type=DataType.TEXT,
                description="Short description of the content.",
            ),
            Property(
                name='category',
                data_type=DataType.TEXT,
                description="Category of the short.",
            ),
        ],
        vectorizer_config=[
            # Set a named vector
            Configure.NamedVectors.multi2vec_bind(
                name="shorts_vec",
                text_fields=[
                    Multi2VecField(name='description', weight=0.6),
                    Multi2VecField(name='category', weight=0.4),
                ]
            ),
        ],
    )

    await client.collections.create(
        "UserInteractions",
        properties=[
            Property(
                name='object_id',
                data_type=DataType.INT,
                description="Unique identifier for the user."
            ),
        ],
        references=[
            ReferenceProperty(
                name='liked_shorts',
                target_collection="Short",
                description="Short which liked by current user",
            ),
        ],
        vectorizer_config=Configure.Vectorizer.ref2vec_centroid(
            reference_properties=[
                'liked_shorts'
            ]
        ),
    )


async def main():
    client: WeaviateAsyncClient = weaviate.use_async_with_local(
        host=settings.WEAVIATE_HOST,
        port=settings.WEAVIATE_PORT,
        skip_init_checks=True
    )
    await client.connect()
    try:
        await init_collections(client=client)

        short_collection = client.collections.get('Short')
        user_collection = client.collections.get('UserInteractions')

        short_uuid = await short_collection.data.insert(properties={
            'object_id': 1,
            'description': 'description1',
            'category': 'category1'
        }, uuid=uuid.uuid4())
        print(f'created short: {short_uuid}')

        user_uuid = await user_collection.data.insert(properties={'object_id': 1}, uuid=uuid.uuid4())
        print(f'created user: {user_uuid}')

        # creating cross-reference
        await user_collection.data.reference_add(
            from_uuid=user_uuid,
            from_property='liked_shorts',
            to=short_uuid
        )

        resp = await user_collection.query.fetch_object_by_id(
            uuid=user_uuid,
            include_vector=True,
            return_references=QueryReference(
                link_on='liked_shorts',
                return_properties=["object_id"],
            ),
        )

        print(resp)

    finally:
        await client.close()

if __name__ == '__main__':
    asyncio.run(main())

Output:

created short: 09e06d77-a1f6-4274-b0ae-485f532322fd
created user: 0c6ff736-ef4a-45e9-9e34-93ff67a53dfe
ObjectSingleReturn(uuid=_WeaviateUUIDInt('0c6ff736-ef4a-45e9-9e34-93ff67a53dfe'), metadata=MetadataSingleObjectReturn(creation_time=datetime.datetime(2025, 1, 6, 15, 20, 12, 837000, tzinfo=datetime.timezone.utc), last_update_time=datetime.datetime(2025, 1, 6, 15, 20, 12, 840000, tzinfo=datetime.timezone.utc), is_consistent=None), properties={'object_id': 1}, references={'liked_shorts': <weaviate.collections.classes.internal._CrossReference object at 0x7c28da11e610>}, vector={}, collection='UserInteractions')

hi @Bohdan_Klishchov !!

Sorry for the delay here.

I have produced a recipe (soon to be published) and can share some example here:


# lets create our collections
client.collections.delete("Interaction")
interaction = client.collections.create(
    "Interaction",
    vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(),
    properties=[
        wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT),
    ]
)

client.collections.delete("Person")
person = client.collections.create(
    "Person",
    vectorizer_config=wvc.config.Configure.Vectorizer.ref2vec_centroid(
        reference_properties=["interactions"]
    ),
    properties=[
        wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
    ],
    references=[
        wvc.config.ReferenceProperty(name="interactions", target_collection="Interaction")
    ]
)

creating the data

from weaviate.util import generate_uuid5

# Lets create our people
person.data.insert({"name": "Bob Smith"}, uuid=generate_uuid5("bob"))
person.data.insert({"name": "Oliver Doe"}, uuid=generate_uuid5("oliver"))
person.data.insert({"name": "Mary Jane"}, uuid=generate_uuid5("mary"))

# Lets create our interactions
interaction.data.insert({"text": "Is a vegetarian"}, uuid=generate_uuid5("interaction1"))
interaction.data.insert({"text": "Like Indian and Asian food"}, uuid=generate_uuid5("interaction2"))
interaction.data.insert({"text": "Is carnivore"}, uuid=generate_uuid5("interaction3"))
interaction.data.insert({"text": "likes playing soccer"}, uuid=generate_uuid5("interaction4"))

## now lets add our interactions to our people
# bob is vegetarian
person.data.reference_add(
    from_uuid=generate_uuid5("bob"),
    from_property="interactions",
    to=generate_uuid5("interaction1")
)

# John like some indian and asian food
person.data.reference_add(
    from_uuid=generate_uuid5("oliver"),
    from_property="interactions",
    to=generate_uuid5("interaction2")
)

# mary plays soccer, and is carnivore
person.data.reference_add(
    from_uuid=generate_uuid5("mary"),
    from_property="interactions",
    to=generate_uuid5("interaction3")
)
person.data.reference_add(
    from_uuid=generate_uuid5("mary"),
    from_property="interactions",
    to=generate_uuid5("interaction4")
)

Printing some results:

# this is how we will find the person with the most similar interactions
def print_results_to(target_person):
    suggestion = person.query.near_object(
        near_object=generate_uuid5(target_person),
        return_references=wvc.query.QueryReference(
            return_properties=["text"], link_on="interactions"
        ),
        return_metadata=wvc.query.MetadataQuery(distance=True),
        include_vector=True
    )
    for o in suggestion.objects:
        print(f"### {o.metadata.distance}")
        print(o.properties)
        print([i.properties.get("text") for i in o.references["interactions"].objects], "\n")

print_results_to("bob")

will output:

0.0

{‘name’: ‘Bob Smith’}
[‘Is a vegetarian’]

0.2569286823272705

{‘name’: ‘Mary Jane’}
[‘Is carnivore’, ‘likes playing soccer’]

0.37227851152420044

{‘name’: ‘Oliver Doe’}
[‘Like Indian and Asian food’]

now we create a new customer, and check the resulting similarity:

# creating a new customer that want to do a barbecue
# let's first create the person
person.data.insert({"name": "New Customer"}, uuid=generate_uuid5("customer"))
# create the interaction
interaction.data.insert({"text": "Want to do a barbecue"}, uuid=generate_uuid5("interaction5"))
# tie the nots
person.data.reference_add(
    from_uuid=generate_uuid5("customer"),
    from_property="interactions",
    to=generate_uuid5("interaction5")
)

and now, printing the results to this person:

print_results_to("customer")

Will print the output:

-7.152557373046875e-07

{‘name’: ‘New Customer’}
[‘Want to do a barbecue’]

0.3768976926803589

{‘name’: ‘Mary Jane’}
[‘Is carnivore’, ‘likes playing soccer’]

0.4221373200416565

{‘name’: ‘Bob Smith’}
[‘Is a vegetarian’]

0.42987650632858276

{‘name’: ‘Oliver Doe’}
[‘Like Indian and Asian food’]

As you can see, you can create a new person, with at least one interaction, and it will start to get close to other persons.

Let me know if that helps!

Thanks!

1 Like