But, where i search ENABLE_TOKENIZER_GSE in source code
I find ENABLE_TOKENIZER_GSE_CH, so i think it is just the chinese tokenizer
And then i find that:
gse is config for japeness
gse_ch just config for chinese
So i try:
But case error where:
client.schema.create_class(class_config)
weaviate.exceptions.UnexpectedStatusCodeException: Create class! Unexpected status code: 422, with response body: {'error': [{'message': "tokenization 'gse_ch' is not allowed for data type 'text'"}]}.
Then find in source code:
I add it and want to rebuild the docker:
modify a part of the code in Dockerfile:
# syntax=docker/dockerfile:1.6
# Dockerfile for development purposes.
# Read docs/development.md for more information
# vi: ft=dockerfile
###############################################################################
# Base build image
FROM golang:1.24-alpine3.22 AS build_base
ENV GO111MODULE=on
# new: add goproxy
ENV GOPROXY=https://goproxy.cn,direct
RUN apk add --no-cache bash ca-certificates git gcc g++ libc-dev
WORKDIR /workspace
COPY go.mod go.sum ./
RUN go mod download
###############################################################################
# This image builds the weaviate server
FROM build_base AS server_builder
RUN mkdir -p /runtime/go-ego
ARG TARGETARCH
ARG GIT_BRANCH="unknown"
ARG GIT_REVISION="unknown"
ARG BUILD_USER="unknown"
ARG BUILD_DATE="unknown"
ARG EXTRA_BUILD_ARGS=""
ARG CGO_ENABLED=1
ENV CGO_ENABLED=$CGO_ENABLED
COPY . .
# new:custom gse dict
COPY gse-dict/custom_words.txt /runtime/gse-dict/custom_words.txt
COPY gse-dict/stop_custom.txt /runtime/gse-dict/stop_custom.txt
RUN --mount=type=cache,id=gobuild-${TARGETARCH},target=/root/.cache/go-build,sharing=locked \
GOOS=linux GOARCH=${TARGETARCH} \
go build $EXTRA_BUILD_ARGS -trimpath \
-ldflags="-s -w -extldflags '-static' \
-X github.com/weaviate/weaviate/usecases/build.Branch=${GIT_BRANCH} \
-X github.com/weaviate/weaviate/usecases/build.Revision=${GIT_REVISION} \
-X github.com/weaviate/weaviate/usecases/build.BuildUser=${BUILD_USER} \
-X github.com/weaviate/weaviate/usecases/build.BuildDate=${BUILD_DATE}" \
-o /weaviate-server ./cmd/weaviate-server
RUN go_ego_dir=/go/pkg/mod/github.com/go-ego && \
if [ -d "$go_ego_dir" ]; then cp -a "$go_ego_dir/." /runtime/go-ego/; fi
###############################################################################
# This creates an image that can be used to fake an api for telemetry acceptance test purposes
FROM build_base AS telemetry_mock_api
COPY . .
ENTRYPOINT ["./tools/dev/telemetry_mock_api.sh"]
###############################################################################
# Weaviate (no differentiation between dev/test/prod - 12 factor!)
FROM alpine:3.22 AS weaviate
RUN apk add --no-cache bc ca-certificates openssl && mkdir ./modules
RUN mkdir -p /var/gse/dicts
# new:from server_builder copy to weaviate container
COPY --from=server_builder /runtime/gse-dict/custom_words.txt /var/gse/dicts/custom_words.txt
COPY --from=server_builder /runtime/gse-dict/stop_custom.txt /var/gse/dicts/stop_custom.txt
COPY --from=server_builder /weaviate-server /bin/weaviate
COPY --from=server_builder /runtime/go-ego/ /go/pkg/mod/github.com/go-ego/
ENTRYPOINT ["/bin/weaviate"]
CMD ["--host","0.0.0.0","--port","8080","--scheme","http"]
But run error:
Do client.schema.create_class(class_config) ok, but client.data_object.create()
data = {
"text": "商品甲乙丙丁的使用方法",
"doc_id": str(uuid.uuid4()),
"doc_hash": get_md5("商品甲乙丙丁的使用方法"),
"document_id": str(uuid.uuid4()),
"dataset_id": str(uuid.uuid4())
}
uuid1 = client.data_object.create(
data_object=data,
class_name=class_name,
vector=[random.uniform(-1, 1) for _ in range(384)]
)
print("插入成功_1,UUID:", uuid1)
case error:
requests.exceptions.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
docker logs:
[root@localhost ~]# docker logs root-weaviate-1
2025/09/26 02:09:38 Dict files path: [github.com/go-ego/gse@v0.80.3/data/dict/zh/t_1.txt github.com/go-ego/gse@v0.80.3/data/dict/zh/s_1.txt]
2025/09/26 02:09:38 Load the gse dictionary: "github.com/go-ego/gse@v0.80.3/data/dict/zh/t_1.txt"
2025/09/26 02:09:38 Could not load dictionaries: "github.com/go-ego/gse@v0.80.3/data/dict/zh/t_1.txt", open github.com/go-ego/gse@v0.80.3/data/dict/zh/t_1.txt: no such file or directory
2025/09/26 02:09:38 gse.New failed: open github.com/go-ego/gse@v0.80.3/data/dict/zh/t_1.txt: no such file or directory
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"warning","log_level_env":"","msg":"log level not recognized, defaulting to info","time":"2025-09-26T02:09:39Z"}
{"action":"startup","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"Feature flag LD integration disabled: could not locate WEAVIATE_LD_API_KEY env variable","time":"2025-09-26T02:09:39Z"}
{"action":"startup","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2025-09-26T02:09:39Z"}
{"action":"startup","auto_schema_enabled":true,"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"warning","msg":"Multiple vector spaces are present, GraphQL Explore and REST API list objects endpoint module include params has been disabled as a result.","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"module offload-s3 is enabled","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","flag_key":"collection-retrieval-strategy","level":"info","msg":"feature flag instantiated","time":"2025-09-26T02:09:39Z","tool":"feature_flag","value":"LeaderOnly"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"open cluster service","servers":{"node1":8300},"time":"2025-09-26T02:09:39Z"}
{"address":"172.18.0.2:8301","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"starting cloud rpc server ...","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"starting raft sub-system ...","time":"2025-09-26T02:09:39Z"}
{"address":"172.18.0.2:8300","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"tcp transport","tcpMaxPool":3,"tcpTimeout":10000000000,"time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"loading local db","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"local DB successfully loaded","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"schema manager loaded","n":0,"time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","metadata_only_voters":false,"msg":"construct a new raft node","name":"node1","time":"2025-09-26T02:09:39Z"}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","index":10,"level":"info","msg":"initial configuration","servers":"[[{Suffrage:Voter ID:node1 Address:172.18.0.2:8300}]]","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","last_snapshot_index":0,"last_store_applied_index_on_start":8,"level":"info","msg":"raft node constructed","raft_applied_index":0,"raft_last_index":10,"time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","hasState":true,"level":"info","msg":"raft init","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"attempting to join","remoteNodes":{"node1":"172.18.0.2:8300"},"time":"2025-09-26T02:09:39Z"}
{"action":"cluster_api_startup","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"cluster api server is ready to handle requests on :7947","time":"2025-09-26T02:09:39Z"}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","follower":{},"leader-address":"","leader-id":"","level":"info","msg":"entering follower state","time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"attempted to join and failed","remoteNode":"172.18.0.2:8300","status":8,"time":"2025-09-26T02:09:39Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"attempting to join","remoteNodes":{"node1":"172.18.0.2:8300"},"time":"2025-09-26T02:09:40Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"attempted to join and failed","remoteNode":"172.18.0.2:8300","status":8,"time":"2025-09-26T02:09:40Z"}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","last-leader-addr":"","last-leader-id":"","level":"warning","msg":"heartbeat timeout reached, starting election","time":"2025-09-26T02:09:41Z"}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"entering candidate state","node":{},"term":6,"time":"2025-09-26T02:09:41Z"}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"pre-vote successful, starting election","refused":0,"tally":1,"term":6,"time":"2025-09-26T02:09:41Z","votesNeeded":1}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"election won","tally":1,"term":6,"time":"2025-09-26T02:09:41Z"}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","leader":{},"level":"info","msg":"entering leader state","time":"2025-09-26T02:09:41Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","last_store_log_applied_index":8,"level":"info","log_index":8,"log_name":"LogCommand","log_type":0,"msg":"reloading local DB as RAFT and local DB are now caught up","time":"2025-09-26T02:09:41Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"reload local db: update schema ...","time":"2025-09-26T02:09:41Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","index":"Vector_index_618b3434_36e9_069f_7ff3_eb189bf56ca8_Node","level":"info","msg":"reload local index","time":"2025-09-26T02:09:41Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","index":"Vector_index_b50b2b0e_598c_43d6_b424_7207ac2ceb5d_Node","level":"info","msg":"reload local index","time":"2025-09-26T02:09:41Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"configured versions","server_version":"1.34.0-dev","time":"2025-09-26T02:09:41Z","version":"1.34.0-dev"}
{"action":"grpc_startup","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"grpc server listening at [::]:50051","time":"2025-09-26T02:09:41Z"}
{"address":"172.18.0.2:8300","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"current Leader","time":"2025-09-26T02:09:41Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"attempting to join","remoteNodes":{"node1":"172.18.0.2:8300"},"time":"2025-09-26T02:09:41Z"}
{"action":"restapi_management","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"Serving weaviate at http://[::]:8080","time":"2025-09-26T02:09:41Z","version":"1.34.0-dev"}
{"action":"raft","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","command":0,"level":"info","msg":"updating configuration","server-addr":"172.18.0.2:8300","server-id":"node1","servers":"[[{Suffrage:Voter ID:node1 Address:172.18.0.2:8300}]]","time":"2025-09-26T02:09:41Z"}
{"action":"restore_from_disk","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"snapshots disabled, loading from commit log","time":"2025-09-26T02:09:42Z"}
{"action":"restore_from_disk","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","duration":"68.335µs","level":"info","msg":"restored data from disk","time":"2025-09-26T02:09:42Z"}
{"action":"hnsw_prefill_cache_async","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"not waiting for vector cache prefill, running in background","time":"2025-09-26T02:09:42Z","wait_for_cache_prefill":false}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"Completed loading shard vector_index_618b3434_36e9_069f_7ff3_eb189bf56ca8_node_OpOHHIV9I79k in 1.343652ms","time":"2025-09-26T02:09:42Z"}
{"action":"hnsw_vector_cache_prefill","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2025-09-26T02:09:42Z","took":54450}
{"action":"restore_from_disk","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"snapshots disabled, loading from commit log","time":"2025-09-26T02:09:42Z"}
{"action":"restore_from_disk","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","duration":"194.373µs","level":"info","msg":"restored data from disk","time":"2025-09-26T02:09:42Z"}
{"action":"hnsw_prefill_cache_async","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"not waiting for vector cache prefill, running in background","time":"2025-09-26T02:09:42Z","wait_for_cache_prefill":false}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"Completed loading shard vector_index_b50b2b0e_598c_43d6_b424_7207ac2ceb5d_node_EN6I6QEfwct1 in 1.532502ms","time":"2025-09-26T02:09:42Z"}
{"action":"hnsw_vector_cache_prefill","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","count":3000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2025-09-26T02:09:42Z","took":238558}
{"action":"telemetry_push","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"telemetry started","payload":"\u0026{MachineID:3fc39066-83d3-4b97-8b4f-f727fc279f1b Type:INIT Version:1.34.0-dev ObjectsCount:0 OS:linux Arch:amd64 UsedModules:[] CollectionsCount:2}","time":"2025-09-26T02:09:43Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"Metadata FSM reported caught up, starting replication engine","time":"2025-09-26T02:09:56Z"}
{"action":"replication_engine","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","engine":{},"level":"info","msg":"starting replication engine","node":"node1","time":"2025-09-26T02:09:56Z"}
{"action":"replication_engine","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","consumer":{},"level":"info","msg":"starting replication engine consumer","node":"node1","time":"2025-09-26T02:09:56Z"}
{"action":"replication_engine","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","component":"replication_consumer","level":"info","max_workers":10,"msg":"starting replication operation consumer","node":"node1","op_timeout":86400000000000,"time":"2025-09-26T02:09:56Z"}
{"action":"replication_engine","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","level":"info","msg":"starting replication engine producer","node":"node1","producer":{},"time":"2025-09-26T02:09:56Z"}
{"action":"replication_engine","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","component":"replication_producer","level":"info","msg":"starting replication engine FSM producer","node":"node1","polling_interval":5000000000,"time":"2025-09-26T02:09:56Z"}
{"build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","error":"runtime error: invalid memory address or nil pointer dereference","level":"error","method":"POST","msg":"runtime error: invalid memory address or nil pointer dereference","path":{"Scheme":"","Opaque":"","User":null,"Host":"","Path":"/v1/objects","RawPath":"","OmitHost":false,"ForceQuery":false,"RawQuery":"","Fragment":"","RawFragment":""},"time":"2025-09-26T02:10:50Z"}
{"action":"requests_total","api":"rest","build_git_commit":"unknown","build_go_version":"go1.24.7","build_image_tag":"unknown","build_wv_version":"1.34.0-dev","class_name":"","error":"runtime error: invalid memory address or nil pointer dereference","level":"error","msg":"unexpected error","query_type":"","time":"2025-09-26T02:10:50Z"}
I think it may lose something when rebuild the images.
So now i want to find some help with rebuild the docker images. Thanks !