"Cannot query field \"wordCount\" on type \"Aggregate

Hello, I’m new to python and I’ve been following the YT on v4 and going through the code to create my own vector db. I’ve been able to create a collection and populate it but having issues with getting aggreatage numbers after data has been populated.

Several questions:

  1. when importing wvc is, which is the recommended route for aggregate querying:
    a) import weaviate.classes.config as wvc
    b) import weaviate.classes as wvc

  2. I’m unable to get aggregate metrics as shown here = #Aggregate | Weaviate - Vector Database

here is the error I’m getting:

Error message:
line 217, in _do
raise WeaviateQueryError(
weaviate.exceptions.WeaviateQueryError: Query call with protocol GQL Aggregate failed with message Error in GraphQL response: [
{
“locations”: [
{
“column”: 34,
“line”: 1
}
],
“message”: “Cannot query field "wordCount" on type "AggregateUkraine002".”,
“path”: null
}
], for the following query: {Aggregate{Ukraine002{meta{count}wordCount { count maximum mean median minimum mode sum }}}}.
sys:1: ResourceWarning: unclosed <ssl.SSLSocket fd=11, family=2, type=1, proto=0, laddr=(‘192.168.1.13’, 62602), raddr=(‘34.149.137.116’, 443)>

Here is the function I’ve used:

def _create_new_weaviate_ukraine001_collections(main_collection_name):
config_file_path = (os.path.join(“parameters.json”))
config = json.load(open(config_file_path))

print(" - - - - - -  - - - - - - create_new_weaviate_ukraine_collections(): - - - - - - - - - - - - - - - -  ")
config_weaviate_sandbox_cluster_url = config["weaviate_sandbox_cluster_url"]
config_weaviate_api_key =config["weaviate_api_key"]
config_open_ai_api = config["open_ai_api"]
config_cohere_test_api_key = config["cohere_test_api_key"] 
config_huggingface_token = config["huggingface_token"]

#Access Weaviate_Client
#https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
weaviate_client = weaviate.connect_to_wcs(
        cluster_url = config_weaviate_sandbox_cluster_url,
        #auth_credentials=weaviate.auth.AuthApiKey(api_key=config_weaviate_api_key), #Weaviate instance API key
        auth_credentials=weaviate.auth.AuthApiKey(config_weaviate_api_key), #Weaviate instance API key
        headers={
            "X-OpenAI-Api-Key": config_open_ai_api,
            "X-Cohere-Api-Key": config_cohere_test_api_key,
            "X-HuggingFace-Api-Key": config_huggingface_token,}
)

try:
    #create review collection (YT 1: 050))
    ukraine_collection = weaviate_client.collections.create(
        name = main_collection_name,
        description = 'Save the Children and Ukraine information',
        properties=[
            wvc.Property(name = "title", data_type=wvc.DataType.TEXT,skip_Vectorization=True),
            wvc.Property(name = "author", data_type=wvc.DataType.TEXT, skip_Vectorization=True),
            wvc.Property(name = "content", description = 'content informaiton', data_type=wvc.DataType.TEXT),
            wvc.Property(name = "summary", data_type=wvc.DataType.TEXT),
            wvc.Property(name = "filename", data_type=wvc.DataType.TEXT,skip_Vectorization=True),
            wvc.Property(name = "load_date", data_type=wvc.DataType.DATE,skip_Vectorization=True),
            wvc.Property(name = "uniqueid", data_type=wvc.DataType.TEXT, skip_Vectorization=True),
        ],
        vectorizer_config=wvc.Configure.Vectorizer.text2vec_openai(),
        generative_config = wvc.Configure.Generative.openai(),
        vector_index_config=wvc.Configure.VectorIndex.hnsw(
            distance_metric=wvc.VectorDistances.COSINE
        ), 
    )
print(" - - - - - -  - - - - - - _add_data_existing_weaviate() - - - - - - - - - - - - - - - -  ")
            #Specify and ID = https://weaviate.io/developers/weaviate/manage-data/import#specify-an-id-value
    uniqueid_rows = [{"uniqueid": f"id_{i+1}"} for i in range(5)] #not sure why it's only 5??

    #Rate limit doc = https://weaviate.io/developers/weaviate/client-libraries/python
    with weaviate_client.batch.rate_limit(requests_per_minute=300) as batch:
        batch.add_object(
            properties={'title': main_title},
            collection = str(ukraine_collection),
        )            
        batch.add_object(
            properties={'summary': 'No_Summary_YET'},
            collection = str(ukraine_collection),
        )
        batch.add_object(
            properties={"load_date": '2024-05-24T16:00:00-08:00'}, #String w/RFC3339 formatt
            collection = str(ukraine_collection),
        )
        batch.add_object(
            properties={'filename': main_filename}, #String w/RFC3339 format
            collection = str(ukraine_collection),
        )
        batch.add_object(
            properties={'author': main_authors}, 
            collection = str(ukraine_collection),
        )
    #not this is adding unique id based on defined collection!
        #https://weaviate.io/developers/weaviate/manage-data/import#specify-an-id-value
    with ukraine_collection.batch.dynamic() as batch:
        for uniqueid_row in uniqueid_rows:
            obj_uuid = generate_uuid5(uniqueid_row)
            batch.add_object(
                properties={'uniqueid': uniqueid_row}, 
                uuid=obj_uuid,
            ) 
    #review the number of imported objects  = https://towardsdatascience.com/getting-started-with-weaviate-a-beginners-guide-to-search-with-vector-databases-14bbb9285839
    print(' - - '* 20)
    #https://weaviate.io/developers/weaviate/api/graphql/aggregate
    #https://weaviate.io/developers/weaviate/api/graphql/aggregate#overview
    print(f"Total # of Imported Objects for Ukraine - - - ")
    response_tot_collection_loaded = ukraine_collection.aggregate.over_all(
        total_count=True,
        return_metrics=wvc.query.Metrics("wordCount").integer(
                count=True,
                maximum=True,
                mean=True,
                median=True,
                minimum=True,
                mode=True,
                sum_=True,
            ),
    )
    print(response_tot_collection_loaded.total_count)
    print(response_tot_collection_loaded.properties)
    print(' - - - '*6)
    print()


    #verify batch and end of batch run
    if len(ukraine_collection.batch.failed_objects) > 0  or len(ukraine_collection.batch.failed_references) > 0:
        print('error!! with batch run!')
        pass

    pass # 

finally:
    #Best practice to close connection with the v4 API.
    weaviate_client.close()  # Close client gracefully

hi! Sorry for the delay here!

Welcome to our community, @Neil ! :hugs:

That query will is over the property wordCount, which you doesn’t seem to have.

Here is a number example:

client.collections.delete("Article")
collection = client.collections.create(
    "Article"
)

collection.data.insert({"text": "This is an example", "number": 1})
collection.data.insert({"text": "This is another example", "number": 2})
collection.data.insert({"text": "This is three examples", "number": 3})
response = collection.aggregate.over_all(
    total_count=True,
    return_metrics=wvc.query.Metrics("number").integer(
        sum_=True,
        mean=True,
        median=True,
        minimum=True,
        maximum=True,
        count=True,
        mode=True
    ),
)

print(response.total_count)
print(response.properties)

Notice that aggregation properties are dependent on the data type of the metrics field:

Let me know if this helps!

Thanks!

Thanks! This has resolved my question! =)

1 Like