I am using Python to enter JSON documents into Weaviate. It is a brand new instance, running locally in Docker, and I keep getting an error that no schema is present. I am not really sure why it is asking for a schema and I think this might be due to a misconfiguration on my part?
IN my test, I remove the collection prior to trying to insert anything so the Add Collection part of the method is being called.
Here is the code I am using to add the document:
def add_to_vectorstore(database_name: str, collection_name: str, json_object: json, state, property_field_names: list[str] = None,
property_value_types: list[weaviate_datatypes] = None,
property_field_values: list[str] = None, object_id: uuid = None) -> [int, uuid, list]:
"""
This method stores a JSON document in the collection provided
Parameters
----------
database_name : str
Name of the databaseShould be set to the user token retrieved from MongDB during login
collection_name: str
Name of the collection to which this attachment should be added
json_object: json
The document to be stored - must be valid JSON
state: TypeDict
Current state object
property_field_names: list[str]
The names of any metadata fields to add to the collection
property_value_types: list[str]
The value types of any metadata fields to add to the collection - use weaviate_valuetypes
property_field_values: list[str]
The values of any metadata fields to add to the collection
object_id: uuid
If this is an update then provide original UUID
Returns
-------
int
Total records added
uuid
Unique ID of the document added/updated
list
Current state object
"""
diff_count = 0
weaviate_client = weaviate.connect_to_local()
internal_name = database_name + '_' + collection_name
try:
if not weaviate_client.collections.exists(internal_name):
logging.debug("Creating the collection " + internal_name)
fields = [wvc.config.Property]
if property_field_names is not None and property_value_types is not None and len(property_field_names) == len(property_value_types):
index = 0
while index < len(property_field_names):
if property_value_types[index] is weaviate_datatypes.Int:
this_type = wvc.config.DataType.INT
elif property_value_types[index] is weaviate_datatypes.DateTime:
this_type = wvc.config.DataType.DATE
elif property_value_types[index] is weaviate_datatypes.Float:
this_type = wvc.config.DataType.NUMBER
elif property_value_types[index] is weaviate_datatypes.JSON:
this_type = wvc.config.DataType.TEXT
elif property_value_types[index] is weaviate_datatypes.BOOL:
this_type = wvc.config.DataType.BOOL
else:
this_type = wvc.config.DataType.TEXT
this_prop = wvc.config.Property(
name=property_field_names[index],
data_type=this_type,
vectorize_property_name=False,
)
fields.append(this_prop)
index += 1
user_collection = weaviate_client.collections.create(
name=internal_name,
vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_huggingface(),
properties=fields,
# Configure the vector index
vector_index_config=wvc.config.Configure.VectorIndex.hnsw( # Or `flat` or `dynamic`
distance_metric=wvc.config.VectorDistances.COSINE,
quantizer=wvc.config.Configure.VectorIndex.Quantizer.bq(),
),
# Configure the inverted index
inverted_index_config=wvc.config.Configure.inverted_index(
index_null_state=True,
index_property_length=True,
index_timestamps=True,
)
)
initial_count = 0
else:
logging.debug("Collection already exists")
user_collection = weaviate_client.collections.get(internal_name)
aggregation = user_collection.aggregate.over_all(total_count=True)
initial_count = aggregation.total_count
user_properties = []
if property_field_names is not None and property_field_values is not None and len(property_field_names) == len(property_field_values):
index = 0
while index < len(property_field_names):
user_properties.append({property_field_names[index]: property_field_values[index]})
index += 1
if object_id is None:
logging.debug("Adding document to Weaviate")
if len(user_properties) > 0:
object_id = user_collection.data.insert(properties=user_properties, vector=json_object)
else:
object_id = user_collection.data.insert(vector=json_object)
else:
logging.debug("Updating document in Weaviate")
if len(user_properties) > 0:
user_collection.data.replace(uuid=object_id, properties=user_properties, vector=json_object)
else:
user_collection.data.replace(uuid=object_id, vector=json_object)
aggregation = user_collection.aggregate.over_all(total_count=True)
final_count = aggregation.total_count
diff_count = final_count - initial_count
logging.debug("Started with " + str(initial_count) + " documents, now have " + str(final_count) + " documents")
except:
trace_back = traceback.format_exc()
logging.error("An unexpected error occurred attempting to add document to Weaviate collection: " + internal_name +
"\nHere is the document that failed: " + write_object_to_prompt(json_object) + " \nWith the error:\n " + trace_back)
finally:
weaviate_client.close()
return diff_count, object_id, state
Here is my YAML:
---
version: '3.4'
services:
weaviate:
image: cr.weaviate.io/semitechnologies/weaviate:1.25.5
restart: on-failure:0
ports:
- 8080:8080
- 50051:50051
environment:
QUERY_DEFAULTS_LIMIT: 20
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: "./data"
DEFAULT_VECTORIZER_MODULE: text2vec-huggingface
ENABLE_MODULES: text2vec-cohere,text2vec-huggingface,text2vec-openai
TRANSFORMERS_INFERENCE_API: http://t2v-transformers:8080
CLUSTER_HOSTNAME: 'node1'
DISTANCE: 'cosine'
PERSISTENCE_HNSW_MAX_LOG_SIZE: '5000MiB'
t2v-transformers:
image: cr.weaviate.io/semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1
environment:
ENABLE_CUDA: 1 # set to 0 to disable
volumes:
weaviate_data:
...
Here is the error:
Input should be a valid dictionary or instance of Property [type=model_type, input_value=<class 'weaviate.collecti...lasses.config.Property'>, input_type=ModelMetaclass]
For further information visit https://errors.pydantic.dev/2.7/v/model_type
Can somebody please help me figure out what I am doing wrong?
Thank you!