Description
Hi there I am trying to generate some vector embeddings via Mistral AI but I am having few issues first of all I was getting 429 issue on object insertion i fixed it by limiting the number of request per seconds now I am getting this :
errors: text too long for vectorization. Tokens for text: 10440, max tokens per batch: 8192, ApiKey absolute token limit: 1000000’
client.collections.create(
"Embeddings",
vectorizer_config=[
Configure.NamedVectors.text2vec_mistral(
name="filecontent",
source_properties=["filecontent"],
model="mistral-embed"
)
],
# Additional parameters not shown
)
for row in rows:
original_name = row.OriginalName
full_text = row.FullText
# Create object directly
data_row = {
"filename": original_name,
"filecontent": full_text
}
print(f"Passing file : {original_name}")
collection = client.collections.get("Embeddings")
with collection.batch.rate_limit(requests_per_minute=30) as batch:
obj_uuid = generate_uuid5(data_row)
batch.add_object(
properties=data_row )
if len(collection.batch.failed_objects) > 0:
print(collection.batch.failed_objects)
time.sleep(30)
cursor.close()
connection.close()