I have generated vector embeddings using the AWS Titan Embedding model and uploaded them with their properties into Weaviate. I’m trying to run queries on specific programs. I have question about the best approach in v4 version. Below is the code I used:
Python
# Step 2: Define the collection for the 'Document' class
from weaviate.classes.config import Configure, Property, DataType
collection_snap = {
"class": "Document_snap",
"description": "A class to represent documents of SNAP",
"vectorizer": "none", # Set to "none" because embeddings are provided
"moduleConfig": {
# "text2vec-openai": {}, # Configure if using OpenAI vectorization
# "generative-openai": {} # Configure if using generative queries
},
"properties": [
{"name": "url", "dataType": ["text"]},
{"name": "title", "dataType": ["text"]},
{"name": "chunks", "dataType": ["text"]},
{"name": "program", "dataType": ["text"]}, # includes 'program1', 'program2', 'program3'
]
}
# Batch Upload:
with client.batch.dynamic() as batch:
for i, row in df_combined.iterrows():
print(f"Importing document: {i+1}")
properties = {
"url": row["url"],
"title": row["title"],
"chunks": row["chunks"],
"program": row["program"], # includes 'program1', 'program2', 'program3'
}
batch.add_object(
collection="Document_snap", # Specify the collection name here
vector=row["embeddings"],
properties=properties
)
failed_objs_a = client.batch.failed_objects
print('batch import successful')
print(len(failed_objs_a))
# Filter programs based on user choice:
User_request_program_options = {
'p1': 'program1',
'p2': 'program2',
'p3': 'program3',
'All': ['program1', 'program2', 'program3']
}
Question 1: Is this the right way to run a query with a filter on a single program?
Python
# Query 1:
User_program_request = User_request_program_options['p1'] #program1
document_collection = client.collections.get("Document_snap")
response = document_collection.query.near_vector(
filters=(
Filter.by_property("program").equal(User_program_request)
),
near_vector=query_vector,
limit=2,
return_metadata=MetadataQuery(distance=True)
)
for o in response.objects:
print(o.properties)
print(o.metadata.distance)
query = collection.query.fetch_objects(
filters=(
Filter.by_property("program").equal(User_program_request)
)
)
Title: Running Vector Query with Filter on Weaviate v4 - Multiple Programs Filter
Question 2: How about if the user’s choice is the ‘All’ option, which includes all three programs? Is this the right way to run the query?
Python
Query 2:
User_program_request = User_request_program_options[‘All’] # ‘program1’, ‘program2’, ‘program3’
document_collection = client.collections.get(“Document_snap”)
response = document_collection.query.near_vector(
filters=(
Filter.by_property(“program”).equal(User_program_request)
),
near_vector=query_vector,
limit=2,
return_metadata=MetadataQuery(distance=True)
)
for o in response.objects:
print(o.properties)
print(o.metadata.distance)