This magic happens in three places
In the weaviate docker-compose.yml file you do not declare any modules:
services:
weaviate:
volumes:
- weaviate_data:/var/lib/weaviate
image: semitechnologies/weaviate:1.24.21
ports:
- 8077:8080
- 50051:50051
restart: on-failure:0
environment:
QUERY_DEFAULTS_LIMIT: 25
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true"
PERSISTENCE_DATA_PATH: "/var/lib/weaviate"
DEFAULT_VECTORIZER_MODULE: "none"
CLUSTER_HOSTNAME: "node1"
volumes:
weaviate_data:
as you can see there’s a DEFAULT_VECTORIZER_MODULE: “none”
the second is where you define your collection:
client.collections.create(
schema_name,
description="A class to store articles with a semantic kicker and searchable author.",
vectorizer_config=None,
inverted_index_config=wvcc.Configure.inverted_index(
index_property_length=True,
stopwords_preset=None,
stopwords_additions=isagog_stopwords,
),
vector_index_config=wvcc.Configure.VectorIndex.hnsw(
distance_metric=wvcc.VectorDistances.COSINE
),
properties=[
# default tokenization is tokenization=wvcc.Tokenization.WORD
wvcc.Property(name="app_id", data_type=wvcc.DataType.TEXT, tokenization=wvcc.Tokenization.FIELD), # app generated publicationDay-slug
wvcc.Property(name="author", data_type=wvcc.DataType.TEXT), # search/filter
wvcc.Property(name="category", data_type=wvcc.DataType.TEXT), # search/filter
wvcc.Property(name="excerpt", data_type=wvcc.DataType.TEXT), # search/filter
wvcc.Property(name="kicker", data_type=wvcc.DataType.TEXT), # to be vectorized
wvcc.Property(name="locmentions", data_type=wvcc.DataType.TEXT), # search/filter
wvcc.Property(name="orgmentions", data_type=wvcc.DataType.TEXT), # search/filter
wvcc.Property(name="permentions", data_type=wvcc.DataType.TEXT), # search/filter
wvcc.Property(name="publicationDay", data_type=wvcc.DataType.TEXT, tokenization=wvcc.Tokenization.FIELD), # search/filter
wvcc.Property(name="tag", data_type=wvcc.DataType.TEXT, tokenization=wvcc.Tokenization.FIELD), # search/filter
wvcc.Property(name="title", data_type=wvcc.DataType.TEXT), # to be vectorized
wvcc.Property(name="topic", data_type=wvcc.DataType.TEXT, tokenization=wvcc.Tokenization.FIELD), # search/filter
]
)
and again you can see a vectorizer_config=None
Then when you insert your object you must insert both the data and ist manually derived vector. Something along the following lines:
try:
with client.batch.dynamic() as batch:
for item in data:
vector = item.pop('vector') # This removes and returns the vector
properties = item # The rest of the data is now in 'properties'
batch.add_object(properties=properties, collection=schema_name, vector=vector)
hope this helps. Hasta la victoria !!!