Hi,
I’m using WCS and have a Schema that uses text2vec-huggingface with the model paraphrase-multilingual-mpnet-base-v2. When i do a nearText search, i’ll get the following error:
explorer: get class: vectorize params: vectorize params: vectorize params: vectorize keywords: remote client vectorize: failed with status: 400 error: [Errno 2] No such file or directory: ‘/data/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence_xlnet_config.json’
I can’t figure out what goes wrong. Can anyone help me out?
The schema:
{
"class": "Bp2_scrapedPageChunks_v10",
"invertedIndexConfig": {
"bm25": {
"b": 0.75,
"k1": 1.2
},
"cleanupIntervalSeconds": 60,
"stopwords": {
"additions": null,
"preset": "en",
"removals": null
}
},
"moduleConfig": {
"text2vec-contextionary": {
"vectorizeClassName": false
},
"text2vec-huggingface": {
"model": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
"options": {
"useCache": true,
"useGPU": false,
"waitForModel": true
},
"vectorizeClassName": false
}
},
"properties": [
{
"dataType": [
"text"
],
"indexFilterable": true,
"indexSearchable": true,
"moduleConfig": {
"text2vec-contextionary": {
"vectorizePropertyName": false
},
"text2vec-huggingface": {
"skip": false,
"vectorizePropertyName": false
}
},
"name": "content",
"tokenization": "word"
}
],
"replicationConfig": {
"factor": 1
},
"shardingConfig": {
"virtualPerPhysical": 128,
"desiredCount": 1,
"actualCount": 1,
"desiredVirtualCount": 128,
"actualVirtualCount": 128,
"key": "_id",
"strategy": "hash",
"function": "murmur3"
},
"vectorIndexConfig": {
"skip": false,
"cleanupIntervalSeconds": 300,
"maxConnections": 64,
"efConstruction": 128,
"ef": -1,
"dynamicEfMin": 100,
"dynamicEfMax": 500,
"dynamicEfFactor": 8,
"vectorCacheMaxObjects": 1000000000000,
"flatSearchCutoff": 40000,
"distance": "cosine",
"pq": {
"enabled": false,
"bitCompression": false,
"segments": 0,
"centroids": 256,
"encoder": {
"type": "kmeans",
"distribution": "log-normal"
}
}
},
"vectorIndexType": "hnsw",
"vectorizer": "text2vec-huggingface"
}