Facing same issue
when i have uploaded 12000 images it was taking 430MB eachnode
as i added more photos now it is 25k then it starts behaving weird, it start taking 6GB of ram each node as not much ram left in machine it restarted and again starting taking more ram and stuck in restarting loop , attaching image of logs
each image is around 35-50KB
logs images link → weaviate issue images - Google Drive
docker yml is below
cat docker-compose.yml
version: '3.3'
services:
weaviate-node-1:
init: true
command:
- --host
- 0.0.0.0
- --port
- '8080'
- --scheme
- http
image: cr.weaviate.io/semitechnologies/weaviate:1.24.10
ports:
- 8080:8080
- 6060:6060
- 50051:50051
- 7007:2112
restart: on-failure:0
volumes:
- ./data-node-1:/var/lib/weaviate
environment:
ASYNC_INDEXING: 'true'
PROMETHEUS_MONITORING_ENABLED: 'true'
LOG_LEVEL: 'debug'
QUERY_DEFAULTS_LIMIT: 25
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
CLUSTER_HOSTNAME: 'node1'
CLUSTER_GOSSIP_BIND_PORT: '7100'
CLUSTER_DATA_BIND_PORT: '7101'
IMAGE_INFERENCE_API: http://xxx.xx.xxx.25:8000
DEFAULT_VECTORIZER_MODULE: img2vec-neural
ENABLE_MODULES: img2vec-neural
GOMEMLIMIT: '3050MiB'
LIMIT_RESOURCES: 'true'
networks:
- default
deploy:
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == ubuntu-s-8vcpu-16gb-fra1-01
weaviate-node-2:
init: true
command:
- --host
- 0.0.0.0
- --port
- '8080'
- --scheme
- http
image: cr.weaviate.io/semitechnologies/weaviate:1.24.10
ports:
- 8081:8080
- 6061:6060
- 50052:50051
restart: on-failure:0
volumes:
- ./data-node-2:/var/lib/weaviate
environment:
LOG_LEVEL: 'debug'
QUERY_DEFAULTS_LIMIT: 25
LIMIT_RESOURCES: 'true'
GOMEMLIMIT : '3750MiB'
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
CLUSTER_HOSTNAME: 'node2'
ASYNC_INDEXING: 'true'
CLUSTER_GOSSIP_BIND_PORT: '7102'
CLUSTER_DATA_BIND_PORT: '7103'
CLUSTER_JOIN: 'weaviate-node-1:7100'
IMAGE_INFERENCE_API: http://xxx.xx.xxx.25:8000
DEFAULT_VECTORIZER_MODULE: img2vec-neural
ENABLE_MODULES: img2vec-neural
networks:
- default
deploy:
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == ubuntu-s-8vcpu-16gb-fra1-01
weaviate-node-3:
init: true
command:
- --host
- 0.0.0.0
- --port
- '8080'
- --scheme
- http
image: cr.weaviate.io/semitechnologies/weaviate:1.24.10
ports:
- 8082:8080
- 6062:6060
- 50053:50051
restart: on-failure:0
volumes:
- ./data-node-3:/var/lib/weaviate
environment:
LOG_LEVEL: 'debug'
ASYNC_INDEXING: 'true'
LIMIT_RESOURCES: 'true'
GOMEMLIMIT : '3750MiB'
QUERY_DEFAULTS_LIMIT: 25
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
CLUSTER_HOSTNAME: 'node3'
CLUSTER_GOSSIP_BIND_PORT: '7104'
CLUSTER_DATA_BIND_PORT: '7105'
CLUSTER_JOIN: 'weaviate-node-1:7100'
IMAGE_INFERENCE_API: http://xxx.xx.xxx.25:8000
DEFAULT_VECTORIZER_MODULE: img2vec-neural
ENABLE_MODULES: img2vec-neural
networks:
- default
deploy:
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == ubuntu-s-8vcpu-16gb-fra1-01
networks:
default:```
Schema is given below
schemaConfig = {
‘class’: ‘TestImg16’,
‘vectorizer’: ‘img2vec-neural’,
“vectorIndexType”: “flat”,
“vectorIndexConfig”: {
“bq”: {
“enabled”: True,
“rescoreLimit”: 200,
“cache”: True,
},
},
‘moduleConfig’: {
‘img2vec-neural’: {
‘imageFields’: [
‘image’
]
}
},
‘properties’: [
{
“name”: “sku_id”,
“dataType”: [“string”]
},
{
“name”: “product_id”,
“dataType”: [“string”]
},
{
“name”: “brand”,
“dataType”: [“string”]
},
{
“name”: “some_i”,
“dataType”: [“string”]
},
{
“name”: “color”,
“dataType”: [“string”]
},
{
“name”: “parentCategory”,
“dataType”: [“string”]
},
{
“name”: “childCategories”,
“dataType”: [“text”],
“type”: “class”,
“cardinality”: “multiple”
}
]
}
try:
a = client.schema.create_class(schemaConfig)
print(“Schema defined”)
except Exception as e:
print(str(e) + “Schema already defined, skipping…”)
pushing in batch of 20 base_64_images given. below
public void pushToVectorDb(List<Map<String, Object>> dataObjs) {
if (dataObjs.size() > 0) {
ObjectsBatcher batcher = singleWeaviateClient.weaviateClientMethod().batch().objectsBatcher();
for (Map<String, Object> prop : dataObjs) {
batcher.withObject(WeaviateObject.builder()
.className(className)
.properties(prop)
.id(prop.get(“some_i”).toString())
.build());
}
Result<ObjectGetResponse> a = batcher
.withConsistencyLevel(ConsistencyLevel.ONE)
.run();
for (ObjectGetResponse b : a.getResult()) {
if (!(b.getResult().toString().contains
("SUCCESS"))) {
LOGGER.error("ERROR while bulk import -> " + b.getId());
LOGGER.error("ERROR " + b.getResult().toString());
} else {
LOGGER.info("Completed bulk import -> " + b.getId());
}
}
}
}
Map<String, Object> properties = new HashMap<>();
properties.put(“image”, finalObject1.base64Image);
properties.put(“sku_id”, finalObject1.sku_id);
properties.put(“product_id”, finalObject1.product_id);
properties.put(“brand”, finalObject1.brand);
properties.put(“some_i”, finalObject1.uuid.toString());
properties.put(“parentCategory”, finalObject1.parent_category);
properties.put(“childCategories”, childCategories.toArray(new String[0]));
properties.put(“color”, finalObject1.color);
dataObjs.add(properties);