Description
I am using Azure Durable Functions that creates a new instance of WeaviateDB but does close it down when finished with it.
This is how I create the WeaviateDB:
def get_weaviate_db(kb_id: str):
db = WeaviateVectorDB(kb_id=kb_id, http_host=os.getenv("WEAVIATE_HTTP_HOST"),
http_secure=True, grpc_host=os.getenv("WEAVIATE_GRPC_HOST"),
grpc_secure=True, weaviate_secret=os.getenv("WEAVIATE_API_KEY"))
return db
class WeaviateVectorDB(VectorDB):
"""
An implementation of the VectorDB interface for Weaviate using the Python v4 client.
This class provides methods for adding, removing, and searching for vectorized data
within a Weaviate instance.
"""
def __init__(
self,
kb_id: str,
http_host="localhost",
http_port=8099,
http_secure=False,
grpc_host="localhost",
grpc_port=50052,
grpc_secure=False,
weaviate_secret="secr3tk3y",
init_timeout: int = 2,
query_timeout: int = 45,
insert_timeout: int = 120,
use_embedded_weaviate: bool = False,
):
"""
Initializes a WeaviateVectorDB instance.
Args:
http_host: The hostname of the Weaviate server.
http_port: The HTTP port of the Weaviate server.
http_secure: Whether to use HTTPS for the connection.
grpc_host: The hostname of the Weaviate server for gRPC connections.
grpc_port: The gRPC port of the Weaviate server.
grpc_secure: Whether to use gRPCs for the connection.
class_name: The name of the Weaviate class to use for storing data.
kb_id: An optional identifier for the knowledge base.
"""
# save all of these parameters as attributes so they're easily accessible for the to_dict method
self.kb_id = kb_id
self.http_host = http_host
self.http_port = http_port
self.http_secure = http_secure
self.grpc_host = grpc_host
self.grpc_port = grpc_port
self.grpc_secure = grpc_secure
self.weaviate_secret = weaviate_secret
self.init_timeout = init_timeout
self.query_timeout = query_timeout
self.insert_timeout = insert_timeout
self.use_embedded_weaviate = use_embedded_weaviate
additional_headers = {}
if use_embedded_weaviate:
additional_headers["ENABLE_MODULES"] = (
"backup-filesystem,text2vec-openai,text2vec-cohere,text2vec-huggingface,ref2vec-centroid,generative-openai,qna-openai"
)
additional_headers["BACKUP_FILESYSTEM_PATH"] = "./weaviate/backups"
self.client = weaviate.WeaviateClient(
embedded_options=weaviate.embedded.EmbeddedOptions(
persistence_data_path="./weaviate/data",
),
additional_headers=additional_headers,
)
else:
self.client = weaviate.connect_to_wcs(
cluster_url=self.http_host,
auth_credentials=weaviate.auth.AuthApiKey(self.weaviate_secret), skip_init_checks=True, additional_config=AdditionalConfig(
connection=ConnectionConfig(
session_pool_connections=100,
session_pool_maxsize=500,
session_pool_max_retries=3,
)
),)
self.client.connect()
self.collection_name = "dsrag_test"
self.collection = self.client.collections.get(
self.collection_name
)
So when I create only 1 instance, is slowly but does work. When I have 10+ instances opened and running at the same time, I get this error:
Query call with protocol GRPC search failed with message Deadline Exceeded
Server Setup Information
- Weaviate Server Version: weaviate-client~=4.6.5
- Deployment Method: Using Python V4 SDK
- Multi Node? Number of Running Nodes: Not sure what does it mean
- Client Language and Version: Python V4
- Multitenancy?: