Using python lib weaviate-client==4.9.4 to connect to a weaviate cluster running on version 1.26.4, we can observe a memory leak at each connection.
Using a memory profiler and the following code to test connection :
import weaviate
from weaviate.classes.init import AdditionalConfig, Timeout
from memory_profiler import profile
WEAVIATE_GRPC_HOST = "xxx"
WEAVIATE_HOST = "xxx"
@profile
def get_weaviate_client(weaviate_host: str, weaviate_grpc_host: str) :
weaviate_client = weaviate.connect_to_custom(
http_host=weaviate_host,
http_port=443,
http_secure=True,
grpc_host=weaviate_grpc_host,
grpc_port=443,
grpc_secure=True,
skip_init_checks=False,
additional_config= AdditionalConfig(
timeout=Timeout(init=30, query=60, insert=120)
)
)
weaviate_client.connect()
if not weaviate_client.is_ready():
raise RuntimeError(f"Max retries reached: ")
return weaviate_client
@profile
def test_connexion():
weaviate_client = get_weaviate_client(WEAVIATE_HOST, WEAVIATE_GRPC_HOST)
print('Client connected')
weaviate_client.close()
weaviate_client.connect()
weaviate_client.close()
weaviate_client.connect()
weaviate_client.close()
weaviate_client.connect()
weaviate_client.close()
del weaviate_client
if __name__ == '__main__':
test_connexion()
I can see this memory consumption :
Line # Mem usage Increment Occurrences Line Contents
=============================================================
8 71.1 MiB 71.1 MiB 1 @profile
9 def get_weaviate_client(weaviate_host: str, weaviate_grpc_host: str) :
10 84.6 MiB 13.5 MiB 2 weaviate_client = weaviate.connect_to_custom(
11 71.1 MiB 0.0 MiB 1 http_host=weaviate_host,
12 71.1 MiB 0.0 MiB 1 http_port=443,
13 71.1 MiB 0.0 MiB 1 http_secure=True,
14 71.1 MiB 0.0 MiB 1 grpc_host=weaviate_grpc_host,
15 71.1 MiB 0.0 MiB 1 grpc_port=443,
16 71.1 MiB 0.0 MiB 1 grpc_secure=True,
17 71.1 MiB 0.0 MiB 1 skip_init_checks=False,
18 71.1 MiB 0.0 MiB 2 additional_config= AdditionalConfig(
19 71.1 MiB 0.0 MiB 1 timeout_=Timeout(init=30, query=60, insert=120)
20 )
21 )
22 84.6 MiB 0.0 MiB 1 weaviate_client.connect()
23 84.6 MiB 0.0 MiB 1 if not weaviate_client.is_ready():
24 raise RuntimeError(f"Max retries reached: ")
25 84.6 MiB 0.0 MiB 1 return weaviate_client
Client connected
Line # Mem usage Increment Occurrences Line Contents
=============================================================
27 71.1 MiB 71.1 MiB 1 @profile
28 def test_connexion():
29 84.6 MiB 13.5 MiB 1 weaviate_client = get_weaviate_client(WEAVIATE_HOST, WEAVIATE_GRPC_HOST)
30 84.6 MiB 0.0 MiB 1 print('Client connected')
31 84.2 MiB -0.4 MiB 1 weaviate_client.close()
32
33 86.3 MiB 2.1 MiB 1 weaviate_client.connect()
34 86.5 MiB 0.2 MiB 1 weaviate_client.close()
35
36 87.0 MiB 0.5 MiB 1 weaviate_client.connect()
37 87.4 MiB 0.4 MiB 1 weaviate_client.close()
38
39 88.4 MiB 1.0 MiB 1 weaviate_client.connect()
40 88.7 MiB 0.2 MiB 1 weaviate_client.close()
41
42 88.7 MiB 0.0 MiB 1 del weaviate_client
So we can see that each weaviate_client.connect() consume more memory than the memory released by weaviate_client.close() .
Somebody have an idea why and how to fix it ?
because our service is running in EKS cluster and new services scale up frequently accordling to scaling policy based on memory consumption. But never scale down.
Thanks a lot