Description
I am using a 3-node cluster with the following Docker Compose.
cat docker-compose.yml
version: ‘3.8’
services:
weaviate-node-1:
image: cr[dot]weaviate[dot]io/semitechnologies/weaviate:1.30.3
ports:
- 8080:8080
- 6050:6060
- 50051:50051
- 8300:8300
restart: on-failure:0
volumes:
- weaviate-node-1-data:/var/lib/weaviate
environment:
CLUSTER_HOSTNAME: ‘node1’
CLUSTER_GOSSIP_BIND_PORT: ‘7100’
CLUSTER_DATA_BIND_PORT: ‘7101’
RAFT_BOOTSTRAP_EXPECT: 3
RAFT_JOIN: ‘node1,node2,node3’
ENABLE_API_BASED_MODULES: ‘false’
ENABLE_MODULES: ‘’
DISABLE_MODULES: ‘text2vec-contextionary,text2vec-transformers,text2vec-openai,text2vec-huggingface’
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: ‘true’
PERSISTENCE_DATA_PATH: /var/lib/weaviate
QUERY_DEFAULTS_LIMIT: 1000
QUERY_MAXIMUM_RESULTS: 100000
LOG_LEVEL: debug
ASYNC_INDEXING: ‘true’
ENABLE_FULL_METRICS: ‘true’
deploy:
resources:
limits:
cpus: ‘7.0’
memory: ‘14G’
networks: - weaviate-net
weaviate-node-2:
image: cr[dot]weaviate[dot]io/semitechnologies/weaviate:1.30.3
ports:
- 8081:8080
- 6051:6060
- 50052:50051
- 8301:8300
restart: on-failure:0
volumes:
- weaviate-node-2-data:/var/lib/weaviate
environment:
CLUSTER_HOSTNAME: ‘node2’
CLUSTER_GOSSIP_BIND_PORT: ‘7102’
CLUSTER_DATA_BIND_PORT: ‘7103’
CLUSTER_JOIN: ‘weaviate-node-1:7100’
RAFT_BOOTSTRAP_EXPECT: 3
RAFT_JOIN: ‘node1,node2,node3’
ENABLE_API_BASED_MODULES: ‘false’
ENABLE_MODULES: ‘’
DISABLE_MODULES: ‘text2vec-contextionary,text2vec-transformers,text2vec-openai,text2vec-huggingface’
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: ‘true’
PERSISTENCE_DATA_PATH: /var/lib/weaviate
QUERY_DEFAULTS_LIMIT: 1000
QUERY_MAXIMUM_RESULTS: 100000
LOG_LEVEL: debug
ASYNC_INDEXING: ‘true’
ENABLE_FULL_METRICS: ‘true’
deploy:
resources:
limits:
cpus: ‘7.0’
memory: ‘14G’
networks: - weaviate-net
weaviate-node-3:
image: cr[dot]weaviate[dot]io/semitechnologies/weaviate:1.30.3
ports:
- 8082:8080
- 6052:6060
- 50053:50051
- 8302:8300
restart: on-failure:0
volumes:
- weaviate-node-3-data:/var/lib/weaviate
environment:
CLUSTER_HOSTNAME: ‘node3’
CLUSTER_GOSSIP_BIND_PORT: ‘7104’
CLUSTER_DATA_BIND_PORT: ‘7105’
CLUSTER_JOIN: ‘weaviate-node-1:7100’
RAFT_BOOTSTRAP_EXPECT: 3
RAFT_JOIN: ‘node1,node2,node3’
ENABLE_API_BASED_MODULES: ‘false’
ENABLE_MODULES: ‘’
DISABLE_MODULES: ‘text2vec-contextionary,text2vec-transformers,text2vec-openai,text2vec-huggingface’
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: ‘true’
PERSISTENCE_DATA_PATH: /var/lib/weaviate
QUERY_DEFAULTS_LIMIT: 1000
QUERY_MAXIMUM_RESULTS: 100000
LOG_LEVEL: debug
ASYNC_INDEXING: ‘true’
ENABLE_FULL_METRICS: ‘true’
deploy:
resources:
limits:
cpus: ‘7.0’
memory: ‘14G’
networks: - weaviate-net
volumes:
weaviate-node-1-data:
driver: local
driver_opts:
type: none
device: /mnt/weaviate/pod1
o: bind
weaviate-node-2-data:
driver: local
driver_opts:
type: none
device: /mnt/weaviate/pod2
o: bind
weaviate-node-3-data:
driver: local
driver_opts:
type: none
device: /mnt/weaviate/pod3
o: bind
networks:
weaviate-net:
driver: bridge
My cluster had been running fine for the last three months, but suddenly last night it started showing issues with the nodes joining the cluster.
Server Setup Information
- Weaviate Server Version: weaviate:1.30.3
- Deployment Method: docker compose
- Multi Node? Number of Running Nodes: Yes, 3 nodes
- Client Language and Version: English
- Multitenancy?: none
Any additional Information
==================== Logs for weaviate-cluster-weaviate-node-1-1 ====================
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.4:8300”,“status”:13,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“fields.time”:117158505668,“level”:“debug”,“msg”:“failed to contact”,“server-id”:“node3”,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“backoff time”:500000000,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“error”:“dial tcp: address 99999999: invalid port”,“level”:“error”,“msg”:“failed to heartbeat to”,“peer”:“172.19.0.2:8300”,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“fields.time”:117609801307,“level”:“debug”,“msg”:“failed to contact”,“server-id”:“node3”,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“backoff time”:500000000,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“error”:“dial tcp: address 99999999: invalid port”,“level”:“error”,“msg”:“failed to heartbeat to”,“peer”:“172.19.0.2:8300”,“time”:“2025-08-29T05:55:12Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempting to join”,“remoteNodes”:{“node1”:“172.19.0.4:8300”,“node2”:“172.19.0.3:8300”},“time”:“2025-08-29T05:55:12Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.4:8300”,“status”:13,“time”:“2025-08-29T05:55:12Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.3:8300”,“status”:8,“time”:“2025-08-29T05:55:12Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“fields.time”:118093069224,“level”:“debug”,“msg”:“failed to contact”,“server-id”:“node3”,“time”:“2025-08-29T05:55:12Z”}
{“action”:“raft”,“backoff time”:500000000,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“error”:“dial tcp: address 99999999: invalid port”,“level”:“error”,“msg”:“failed to heartbeat to”,“peer”:“172.19.0.2:8300”,“time”:“2025-08-29T05:55:12Z”}
==================== Logs for weaviate-cluster-weaviate-node-2-1 ====================
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.3:8300”,“status”:8,“time”:“2025-08-29T05:55:09Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempting to join”,“remoteNodes”:{“node1”:“172.19.0.4:8300”,“node2”:“172.19.0.3:8300”},“time”:“2025-08-29T05:55:10Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.4:8300”,“status”:13,“time”:“2025-08-29T05:55:10Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.3:8300”,“status”:8,“time”:“2025-08-29T05:55:10Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempting to join”,“remoteNodes”:{“node1”:“172.19.0.4:8300”,“node2”:“172.19.0.3:8300”},“time”:“2025-08-29T05:55:11Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.4:8300”,“status”:13,“time”:“2025-08-29T05:55:11Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.3:8300”,“status”:8,“time”:“2025-08-29T05:55:11Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempting to join”,“remoteNodes”:{“node1”:“172.19.0.4:8300”,“node2”:“172.19.0.3:8300”},“time”:“2025-08-29T05:55:12Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.4:8300”,“status”:13,“time”:“2025-08-29T05:55:12Z”}
{“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“attempted to join and failed”,“remoteNode”:“172.19.0.3:8300”,“status”:8,“time”:“2025-08-29T05:55:12Z”}
==================== Logs for weaviate-cluster-weaviate-node-3-1 ====================
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“debug”,“msg”:“calculated votes needed”,“needed”:2,“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“from”:“node3”,“level”:“debug”,“msg”:“pre-vote received”,“tally”:0,“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“from”:“node3”,“level”:“debug”,“msg”:“pre-vote granted”,“tally”:1,“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“error”:“dial tcp: address 99999999: invalid port”,“level”:“error”,“msg”:“failed to make requestVote RPC”,“target”:{“Suffrage”:0,“ID”:“node2”,“Address”:“172.19.0.4:8300”},“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“from”:“node2”,“level”:“debug”,“msg”:“pre-vote received”,“tally”:1,“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“from”:“node2”,“level”:“debug”,“msg”:“pre-vote denied”,“tally”:1,“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“error”:“dial tcp: address 99999999: invalid port”,“level”:“error”,“msg”:“failed to make requestVote RPC”,“target”:{“Suffrage”:0,“ID”:“node1”,“Address”:“172.19.0.3:8300”},“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“from”:“node1”,“level”:“debug”,“msg”:“pre-vote received”,“tally”:1,“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“from”:“node1”,“level”:“debug”,“msg”:“pre-vote denied”,“tally”:1,“term”:77,“time”:“2025-08-29T05:55:11Z”}
{“action”:“raft”,“build_git_commit”:“ca35d6b”,“build_go_version”:“go1.24.3”,“build_image_tag”:“v1.30.3”,“build_wv_version”:“1.30.3”,“level”:“info”,“msg”:“pre-vote campaign failed, waiting for election timeout”,“refused”:2,“tally”:1,“term”:77,“time”:“2025-08-29T05:55:11Z”,“votesNeeded”:2}
Please suggest steps to resolve the issue, as this is a production setup. I am hesitant to clear the Raft state because in the past, I’ve observed that deleting the Raft directory on all nodes after clearing the Raft state leads to a stable cluster, but we risk losing data and its relationship with the schema.