Error with parallel requests

Description

I am running several requests to weaviate like so:

client = None


def connect_client():
    global client
    client = weaviate.connect_to_weaviate_cloud(
        cluster_url=os.getenv("WEAVIATE_URL"),
        auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WEAVIATE_KEY")),
        headers={"X-OpenAI-Api-key": os.getenv("OPENAI_APIKEY")},
    )


def should_create_collection(collection_name: str):
    return (
        not client.collections.exists(collection_name)
        or len(client.collections.get(collection_name)) == 0
    )


def close_client():
    client.close()

def create_collection(collection_name: str, override=True):
    if override:
        client.collections.delete(collection_name)

    client.collections.create(
        collection_name,
        vectorizer_config=Configure.Vectorizer.text2vec_openai(),
        generative_config=Configure.Generative.openai(),
        properties=[
            Property(name="prop1", data_type=DataType.TEXT),
            Property(name="prop2", data_type=DataType.INT),
        ],
    )

def store_new_doc(collection_name: str, pdf_file_path: str):
    extracted_text = extract_text_from_pdf(pdf_file_path)
    collection = client.collections.get(collection_name)
    with collection.batch.dynamic() as batch:
        for data_row in extracted_text:
            batch.add_object(
                properties=data_row,
            )


# Then used like so:
 collection = client.collections.get(collection_name)
    response_terms = collection.query.near_text(
        query=query_list,
        limit=10,
    )
    response_query = collection.query.near_text(
        query=query,
        limit=2,
        target_vector="prop1",
    )

When running the above code in parallel I start getting errors like so:

 File "/home/myuser/dev/project/backend/.venv/lib/python3.10/site-packages/weaviate/collections/grpc/query.py", line 658, in __call
    raise WeaviateQueryError(e.details(), "GRPC search")  # pyright: ignore
weaviate.exceptions.WeaviateQueryError: Query call with protocol GRPC search failed with message Channel closed!.
/home/myuser/.pyenv/versions/3.10.6/lib/python3.10/contextlib.py:280: ResourceWarning: unclosed <ssl.SSLSocket fd=37, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.ww.yy.xx', 56726), raddr=('34.qq.rr.mm', 443)>
  def helper(*args, **kwds):
ResourceWarning: Enable tracemalloc to get the object allocation traceback
/home/myuser/.pyenv/versions/3.10.6/lib/python3.10/contextlib.py:280: ResourceWarning: unclosed <ssl.SSLSocket fd=27, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.ww.yy.xx', 56704), raddr=('34.qq.rr.mm', 443)>
  def helper(*args, **kwds):
ResourceWarning: Enable tracemalloc to get the object allocation traceback
/home/myuser/.pyenv/versions/3.10.6/lib/python3.10/contextlib.py:280: ResourceWarning: unclosed <ssl.SSLSocket fd=32, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.ww.yy.xx', 56722), raddr=('34.qq.rr.mm', 443)>

and also:

/python3.10/site-packages/weaviate/collections/grpc/query.py", line 658, in __call
    raise WeaviateQueryError(e.details(), "GRPC search")  # pyright: ignore
weaviate.exceptions.WeaviateQueryError: Query call with protocol GRPC search failed with message explorer: get class: vectorize params: vectorize params: vectorize params: vectorize keywords: remote client vectorize: connection to: OpenAI API failed with status: 400 error: '$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference..

Server Setup Information

  • Weaviate Server Version: Managed version
  • Deployment Method:
  • Multi Node? Number of Running Nodes:
  • Client Language and Version: python 3.10.6, weaviate-client = “^4.6.4”
  • Multitenancy?:

Any additional Information

Some requests to weaviate do work, I suspect it happens from running may in parallel or close to each other.

Hi @Agam!

Can you share some reproducible code? Something I could replicate.

If you want to ingest more data efficiently, you shouldn’t run it in parallel but use a fixed batch size and change the concurrent requests:

Do you also face this error if running it single threaded?

Thanks!