Hi, we are trying to create a schema which will support multimodal search where user can use text queries but needs to do semantic search across columns containing text or vectors.
Below is the schema where image_embeddings is a Bring your own vector column where we will generate the embeddings for a imageand dont want weaviate to create vectors, but this needs to be part of multimodal search with other fields like filename, tags, mime_type. Please provide the correct way to define schema for this multimodal search with Bring your own vector?
client.collections.create(
name="SemanticSchema", # The name of the collection ('NV' for named vectors)
properties=[
wc.Property(name="lcid", data_type=wc.DataType.TEXT),
wc.Property(name="checksum", data_type=wc.DataType.TEXT),
wc.Property(name="filename", data_type=wc.DataType.TEXT),
wc.Property(name="tags", data_type=wc.DataType.TEXT),
wc.Property(name="mime_type", data_type=wc.DataType.TEXT),
wc.Property(name="person_names", data_type=wc.DataType.TEXT_ARRAY),
wc.Property(name="location", data_type=wc.DataType.TEXT),
wc.Property(name="image_embeddings", data_type=wc.DataType.NUMBER_ARRAY),
],
# Define & configure the vectorizer module
vectorizer_config=[
wc.Configure.NamedVectors.multi2vec_clip(
name="filename", text_fields=["filename"]
),
wc.Configure.NamedVectors.multi2vec_clip(
name="tags", text_fields=["tags"]
),
wc.Configure.NamedVectors.multi2vec_clip(
name="mime_type", text_fields=["mime_type"]
),
wc.Configure.NamedVectors.multi2vec_clip(
name="location", text_fields=["location"]
),
wc.Configure.NamedVectors.multi2vec_clip(
name="image_filename_tags",
image_fields=[
wc.Multi2VecField(name="image_embeddings")
], # 90% of the vector is from the poster
text_fields=[
wc.Multi2VecField(name="filename"),
wc.Multi2VecField(name="tags"),
wc.Multi2VecField(name="mime_type"),
wc.Multi2VecField(name="location"),
], # 10% of the vector is from the title
),
],
# Define the generative module
#generative_config=wc.Configure.Generative.openai(),
# Add sharding configuration
sharding_config=Configure.sharding(
virtual_per_physical=128,
desired_count=2,
desired_virtual_count=128,
),
replication_config=Configure.replication(
factor=2,
async_enabled=True,
),
)