Hello, I’m new to python and I’ve been following the YT on v4 and going through the code to create my own vector db. I’ve been able to create a collection and populate it but having issues with getting aggreatage numbers after data has been populated.
Several questions:
-
when importing wvc is, which is the recommended route for aggregate querying:
a) import weaviate.classes.config as wvc
b) import weaviate.classes as wvc -
I’m unable to get aggregate metrics as shown here = #Aggregate | Weaviate - Vector Database
here is the error I’m getting:
Error message:
line 217, in _do
raise WeaviateQueryError(
weaviate.exceptions.WeaviateQueryError: Query call with protocol GQL Aggregate failed with message Error in GraphQL response: [
{
“locations”: [
{
“column”: 34,
“line”: 1
}
],
“message”: “Cannot query field "wordCount" on type "AggregateUkraine002".”,
“path”: null
}
], for the following query: {Aggregate{Ukraine002{meta{count}wordCount { count maximum mean median minimum mode sum }}}}.
sys:1: ResourceWarning: unclosed <ssl.SSLSocket fd=11, family=2, type=1, proto=0, laddr=(‘192.168.1.13’, 62602), raddr=(‘34.149.137.116’, 443)>
Here is the function I’ve used:
def _create_new_weaviate_ukraine001_collections(main_collection_name):
config_file_path = (os.path.join(“parameters.json”))
config = json.load(open(config_file_path))
print(" - - - - - - - - - - - - create_new_weaviate_ukraine_collections(): - - - - - - - - - - - - - - - - ")
config_weaviate_sandbox_cluster_url = config["weaviate_sandbox_cluster_url"]
config_weaviate_api_key =config["weaviate_api_key"]
config_open_ai_api = config["open_ai_api"]
config_cohere_test_api_key = config["cohere_test_api_key"]
config_huggingface_token = config["huggingface_token"]
#Access Weaviate_Client
#https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
weaviate_client = weaviate.connect_to_wcs(
cluster_url = config_weaviate_sandbox_cluster_url,
#auth_credentials=weaviate.auth.AuthApiKey(api_key=config_weaviate_api_key), #Weaviate instance API key
auth_credentials=weaviate.auth.AuthApiKey(config_weaviate_api_key), #Weaviate instance API key
headers={
"X-OpenAI-Api-Key": config_open_ai_api,
"X-Cohere-Api-Key": config_cohere_test_api_key,
"X-HuggingFace-Api-Key": config_huggingface_token,}
)
try:
#create review collection (YT 1: 050))
ukraine_collection = weaviate_client.collections.create(
name = main_collection_name,
description = 'Save the Children and Ukraine information',
properties=[
wvc.Property(name = "title", data_type=wvc.DataType.TEXT,skip_Vectorization=True),
wvc.Property(name = "author", data_type=wvc.DataType.TEXT, skip_Vectorization=True),
wvc.Property(name = "content", description = 'content informaiton', data_type=wvc.DataType.TEXT),
wvc.Property(name = "summary", data_type=wvc.DataType.TEXT),
wvc.Property(name = "filename", data_type=wvc.DataType.TEXT,skip_Vectorization=True),
wvc.Property(name = "load_date", data_type=wvc.DataType.DATE,skip_Vectorization=True),
wvc.Property(name = "uniqueid", data_type=wvc.DataType.TEXT, skip_Vectorization=True),
],
vectorizer_config=wvc.Configure.Vectorizer.text2vec_openai(),
generative_config = wvc.Configure.Generative.openai(),
vector_index_config=wvc.Configure.VectorIndex.hnsw(
distance_metric=wvc.VectorDistances.COSINE
),
)
print(" - - - - - - - - - - - - _add_data_existing_weaviate() - - - - - - - - - - - - - - - - ")
#Specify and ID = https://weaviate.io/developers/weaviate/manage-data/import#specify-an-id-value
uniqueid_rows = [{"uniqueid": f"id_{i+1}"} for i in range(5)] #not sure why it's only 5??
#Rate limit doc = https://weaviate.io/developers/weaviate/client-libraries/python
with weaviate_client.batch.rate_limit(requests_per_minute=300) as batch:
batch.add_object(
properties={'title': main_title},
collection = str(ukraine_collection),
)
batch.add_object(
properties={'summary': 'No_Summary_YET'},
collection = str(ukraine_collection),
)
batch.add_object(
properties={"load_date": '2024-05-24T16:00:00-08:00'}, #String w/RFC3339 formatt
collection = str(ukraine_collection),
)
batch.add_object(
properties={'filename': main_filename}, #String w/RFC3339 format
collection = str(ukraine_collection),
)
batch.add_object(
properties={'author': main_authors},
collection = str(ukraine_collection),
)
#not this is adding unique id based on defined collection!
#https://weaviate.io/developers/weaviate/manage-data/import#specify-an-id-value
with ukraine_collection.batch.dynamic() as batch:
for uniqueid_row in uniqueid_rows:
obj_uuid = generate_uuid5(uniqueid_row)
batch.add_object(
properties={'uniqueid': uniqueid_row},
uuid=obj_uuid,
)
#review the number of imported objects = https://towardsdatascience.com/getting-started-with-weaviate-a-beginners-guide-to-search-with-vector-databases-14bbb9285839
print(' - - '* 20)
#https://weaviate.io/developers/weaviate/api/graphql/aggregate
#https://weaviate.io/developers/weaviate/api/graphql/aggregate#overview
print(f"Total # of Imported Objects for Ukraine - - - ")
response_tot_collection_loaded = ukraine_collection.aggregate.over_all(
total_count=True,
return_metrics=wvc.query.Metrics("wordCount").integer(
count=True,
maximum=True,
mean=True,
median=True,
minimum=True,
mode=True,
sum_=True,
),
)
print(response_tot_collection_loaded.total_count)
print(response_tot_collection_loaded.properties)
print(' - - - '*6)
print()
#verify batch and end of batch run
if len(ukraine_collection.batch.failed_objects) > 0 or len(ukraine_collection.batch.failed_references) > 0:
print('error!! with batch run!')
pass
pass #
finally:
#Best practice to close connection with the v4 API.
weaviate_client.close() # Close client gracefully