Crashing weaviate 24.6

Description

Production weaviate crashed out of random, not sure what caused though, running docker-compose setup

Server Setup Information

  • Weaviate Server Version:
  • Deployment Method: Docker
  • Multi Node? Number of Running Nodes: 2 nodes
  • Client Language and Version: http-graphql
  • Multitenancy: Yes
ersion: '3.9'
services:
  weaviate-node-1:
    networks:
        - dev
        - shared
    command:
    - --host
    - 0.0.0.0
    - --port
    - '8080'
    - --scheme
    - http
    image: cr.weaviate.io/semitechnologies/weaviate:1.24.6
    ports:
    - 8080:8080
    - 50051:50051
    volumes:
    - weaviate_01_data:/var/lib/weaviate
    restart: on-failure:0
    environment:
      QUERY_DEFAULTS_LIMIT: 25
      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
      PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
      DEFAULT_VECTORIZER_MODULE: 'none'
      ENABLE_MODULES: 'backup-s3,text2vec-cohere,text2vec-huggingface,text2vec-palm,text2vec-openai,generative-openai,generative-cohere,generative-palm,ref2vec-centroid,reranker-cohere,qna-openai'
      CLUSTER_HOSTNAME: 'node1'
      CLUSTER_GOSSIP_BIND_PORT: '7100'
      CLUSTER_DATA_BIND_PORT: '7101'
      BACKUP_S3_BUCKET: 'xx'
      AWS_ACCESS_KEY_ID: 'xx'
      AWS_SECRET_ACCESS_KEY: 'xx'
      AWS_REGION: 'us-east-1'
      BACKUP_S3_PATH: 'weaviate-backup/'
  weaviate-node-2:
    networks:
        - dev
        - shared
    command:
    - --host
    - 0.0.0.0
    - --port
    - '8080'
    - --scheme
    - http
    image: cr.weaviate.io/semitechnologies/weaviate:1.24.6
    ports:
    - 8081:8081
    - 50052:50052
    volumes:
    - weaviate_02_data:/var/lib/weaviate
    restart: on-failure:0
    environment:
      QUERY_DEFAULTS_LIMIT: 25
      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
      PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
      DEFAULT_VECTORIZER_MODULE: 'none'
      ENABLE_MODULES: 'backup-s3,text2vec-cohere,text2vec-huggingface,text2vec-palm,text2vec-openai,generative-openai,generative-cohere,generative-palm,ref2vec-centroid,reranker-cohere,qna-openai'
      CLUSTER_HOSTNAME: 'node2'
      CLUSTER_GOSSIP_BIND_PORT: '7102'
      CLUSTER_DATA_BIND_PORT: '7103'
      CLUSTER_JOIN: 'weaviate-node-1:7100'
      BACKUP_S3_BUCKET: 'xx'
      AWS_ACCESS_KEY_ID: 'xx'
      AWS_SECRET_ACCESS_KEY: 'xx'
      AWS_REGION: 'us-east-1'
      BACKUP_S3_PATH: 'weaviate-backup/'

networks:
  dev:
    driver: "bridge"
  shared:
    external: true
volumes:
  weaviate_01_data:
  weaviate_02_data:

Any additional Information

{"action":"hnsw_commit_logger_combine_condensed_logs","id":"main","input_first":"/var/lib/weaviate/default/teamQWjneg5YbwZ1/main.hnsw.commitlog.d/1739359455.condensed","input_second":"/var/lib/weaviate/default/teamQWjneg5YbwZ1/main.hnsw.commitlog.d/1741147203.condensed","level":"info","msg":"successfully combined previously condensed commit log files","output":"/var/lib/weaviate/default/teamQWjneg5YbwZ1/main.hnsw.commitlog.d/1739359455","time":"2025-03-06T04:02:01Z"}

{"action":"hnsw_commit_logger_combine_condensed_logs","id":"main","input_first":"/var/lib/weaviate/default/teamLkzPdyP7bQro/main.hnsw.commitlog.d/1731038403.condensed","input_second":"/var/lib/weaviate/default/teamLkzPdyP7bQro/main.hnsw.commitlog.d/1741147204.condensed","level":"info","msg":"successfully combined previously condensed commit log files","output":"/var/lib/weaviate/default/teamLkzPdyP7bQro/main.hnsw.commitlog.d/1731038403","time":"2025-03-06T04:02:01Z"}

{"action":"hnsw_commit_logger_combine_condensed_logs","id":"main","input_first":"/var/lib/weaviate/default/team37N1aMAaWmpn/main.hnsw.commitlog.d/1721027622.condensed","input_second":"/var/lib/weaviate/default/team37N1aMAaWmpn/main.hnsw.commitlog.d/1741147203.condensed","level":"info","msg":"successfully combined previously condensed commit log files","output":"/var/lib/weaviate/default/team37N1aMAaWmpn/main.hnsw.commitlog.d/1721027622","time":"2025-03-06T04:02:01Z"}

{"action":"telemetry_push","level":"info","msg":"telemetry update","payload":"\u0026{MachineID:30e421f1-2b92-468d-b726-dd2c34a18fd2 Type:UPDATE Version:1.24.6 Modules:backup-s3,generative-cohere,generative-openai,generative-palm,qna-openai,ref2vec-centroid,reranker-cohere,text2vec-cohere,text2vec-huggingface,text2vec-openai,text2vec-palm NumObjects:643377 OS:linux Arch:amd64}","time":"2025-03-06T08:18:09Z"}

{"level":"info","msg":"Created shard default_teamxk8mepg2dMyJ in 2.62715ms","time":"2025-03-07T04:00:05Z"}

{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2025-03-07T04:00:05Z","took":83026}

{"class":"Default","level":"info","msg":"start uploading files","time":"2025-03-07T04:00:05Z"}

{"backup_id":"app-default-250307-040004","class":"Default","level":"info","msg":"release backup","time":"2025-03-07T04:02:09Z"}

{"class":"Default","level":"info","msg":"finish uploading files","time":"2025-03-07T04:02:09Z"}

{"level":"info","msg":"start uploading meta data","time":"2025-03-07T04:02:09Z"}

{"level":"info","msg":"finish uploading meta data","time":"2025-03-07T04:02:10Z"}

{"action":"create_backup","backup_id":"app-default-250307-040004","level":"info","msg":"backup completed successfully","time":"2025-03-07T04:02:10Z"}

{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2025-03-07T07:18:20Z"}

{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2025-03-07T07:18:20Z"}

{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2025-03-07T07:18:20Z"}

{"action":"broadcast_abort_transaction","error":"host \"172.19.0.3:7101\": send http request: Delete \"http://172.19.0.3:7101/schema/transactions/5ba28efc-2f25-43b6-88f0-e2c11a4f6a38\": dial tcp 172.19.0.3:7101: connect: connection refused","id":"5ba28efc-2f25-43b6-88f0-e2c11a4f6a38","level":"error","msg":"broadcast tx abort failed","time":"2025-03-07T07:18:20Z"}

{"action":"startup","error":"could not load or initialize schema: sync schema with other nodes in the cluster: read schema: open transaction: broadcast open transaction: host \"172.19.0.3:7101\": send http request: Post \"http://172.19.0.3:7101/schema/transactions/\": dial tcp 172.19.0.3:7101: connect: connection refused","level":"fatal","msg":"could not initialize schema manager","time":"2025-03-07T07:18:20Z"}

{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2025-03-07T07:18:21Z"}

{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2025-03-07T07:18:21Z"}

{"action":"memberlist_init","error":"1 error occurred:\n\t* Failed to join 172.19.0.3:7100: dial tcp 172.19.0.3:7100: connect: connection refused\n\n","level":"error","msg":"memberlist join not successful","remote_hostname":["weaviate-node-1:7100"],"time":"2025-03-07T07:18:21Z"}

{"action":"startup","error":"join cluster: 1 error occurred:\n\t* Failed to join 172.19.0.3:7100: dial tcp 172.19.0.3:7100: connect: connection refused\n\n","level":"error","msg":"could not init cluster state","time":"2025-03-07T07:18:21Z"}

{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2025-03-07T07:18:22Z"}

{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2025-03-07T07:18:22Z"}

{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2025-03-07T07:18:22Z"}

{"level":"warning","msg":"Multiple vector spaces are present, GraphQL Explore and REST API list objects endpoint module include params has been disabled as a result.","time":"2025-03-07T07:18:22Z"}

{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50051","time":"2025-03-07T07:18:22Z"}

{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://[::]:8080","time":"2025-03-07T07:18:22Z"}

{"action":"telemetry_push","level":"info","msg":"telemetry started","payload":"\u0026{MachineID:2c582ae5-789d-447c-9b25-1cc60e5b2bc7 Type:INIT Version:1.24.6 Modules:backup-s3,generative-cohere,generative-openai,generative-palm,qna-openai,ref2vec-centroid,reranker-cohere,text2vec-cohere,text2vec-huggingface,text2vec-openai,text2vec-palm NumObjects:0 OS:linux Arch:amd64}","time":"2025-03-07T07:18:22Z"}

{"action":"lsm_recover_from_active_wal","class":"Default","index":"default","level":"warning","msg":"active write-ahead-log found. Did weaviate crash prior to this? Trying to recover...","path":"/var/lib/weaviate/default/team0wMvbmZOdYAl/lsm/objects/segment-1727236802809722618","shard":"team0wMvbmZOdYAl","time":"2025-03-07T07:18:23Z"}

{"action":"lsm_recover_from_active_wal","class":"Default","index":"default","level":"warning","msg":"active write-ahead-log found. Did weaviate crash prior to this? Trying to recover...","path":"/var/lib/weaviate/default/team0wMvbmZOdYAl/lsm/property__id/segment-1727236802810307015","shard":"team0wMvbmZOdYAl","time":"2025-03-07T07:18:23Z"}

{"action":"lsm_recover_from_active_wal","class":"Default","index":"default","level":"warning","msg":"active write-ahead-log found. Did weaviate crash prior to this? Trying to recover...","path":"/var/lib/weaviate/default/team0wMvbmZOdYAl/lsm/property_docId/segment-1727236802810364796","shard":"team0wMvbmZOdYAl","time":"2025-03-07T07:18:23Z"}

{"action":"lsm_recover_from_active_wal","class":"Default","index":"default","level":"warning","msg":"active write-ahead-log found. Did weaviate crash prior to this? Trying to recover...","path":"/var/lib/weaviate/default/team0wMvbmZOdYAl/lsm/property_text/segment-1727236802810419938","shard":"team0wMvbmZOdYAl","time":"2025-03-07T07:18:23Z"}

{"action":"lsm_recover_from_active_wal","class":"Default","index":"default","level":"warning","msg":"active write-ahead-log found. Did weaviate crash prior to this? Trying to recover...","path":"/var/lib/weaviate/default/team0wMvbmZOdYAl/lsm/property_correctionId/segment-1727236802810704006","shard":"team0wMvbmZOdYAl","time":"2025-03-07T07:18:23Z"}

{"action":"lsm_recover_from_active_wal","class":"Default","index":"default","level":"warning","msg":"active write-ahead-log found. Did weaviate crash prior to this? Trying to recover...","path":"/var/lib/weaviate/default/team0wMvbmZOdYAl/lsm/property_text_searchable/segment-1727236802810544466","shard":"team0wMvbmZOdYAl","time":"2025-03-07T07:18:23Z"}

{"level":"info","msg":"Completed loading shard default_team0wMvbmZOdYAl in 10.048082ms","time":"2025-03-07T07:18:23Z"}

{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2025-03-07T07:18:23Z","took":83694}

{"action":"hnsw_commit_logger_combine_condensed_logs","id":"main","input_first":"/var/lib/weaviate/default/team0wMvbmZOdYAl/main.hnsw.commitlog.d/1727236802.condensed","input_second":"/var/lib/weaviate/default/team0wMvbmZOdYAl/main.hnsw.commitlog.d/1741233604.condensed","level":"info","msg":"successfully combined previously condensed commit log files","output":"/var/lib/weaviate/default/team0wMvbmZOdYAl/main.hnsw.commitlog.d/1727236802","time":"2025-03-07T07:18:23Z"}