[Question] client.batch.failed_objects

import time
import weaviate
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import StorageContext, Settings
from llama_index.readers.file import PyMuPDFReader
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
import nest_asyncio
nest_asyncio.apply()  # Only needed in Jupyter notebooks
weaviate_client = weaviate.connect_to_local()
weaviate_client.connect()
Settings.llm = OpenAI(temperature=0, model="gpt-4o")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=512)
splitter = SentenceSplitter(chunk_size=512, chunk_overlap=100)
documents = SimpleDirectoryReader("./data1").load_data()
nodes = splitter.get_nodes_from_documents(documents)
print(nodes)
if weaviate_client.collections.exists("TextNode"):
    weaviate_client.collections.delete("TextNode")
schema = {
           "class": "TextNode",
           "properties": [
               {"name": "id_", "dataType": ["string"], },
               {"name": "embedding", "dataType": ["number[]"], },
               {"name": "file_path", "dataType": ["string"], },
               {"name": "file_name", "dataType": ["string"], },
               {"name": "file_type", "dataType": ["string"], },
               {"name": "file_size", "dataType": ["int"], },
               {"name": "creation_date", "dataType": ["string"], },
               {"name": "last_modified_date", "dataType": ["string"], },
               # {"name": "source", "dataType": ["string"], },
               {"name": "text", "dataType": ["text"], },
               {"name": "start_char_idx", "dataType": ["int"], },
               {"name": "end_char_idx", "dataType": ["int"], }
               # {"name": "metadata_str", "dataType": ["string"], },
               # {"name": "content", "dataType": ["text"], },
           ]
       }
weaviate_client.collections.create_from_dict(schema)
try:
    collection = weaviate_client.collections.get("TextNode")
    data_lines = []
    for node in nodes:
        embedding = Settings.embed_model.get_text_embedding(node.text)  # 生成嵌入
        node.embedding = embedding 
        properties = {
            "id": node.id_,
            "embedding": node.embedding,
            "file_path": node.metadata.get("file_path"),
            "file_name": node.metadata.get("file_name"),
            "file_type": node.metadata.get("file_type"),
            "file_size": node.metadata.get("file_size"),
            "creation_date": node.metadata.get("creation_date"),
            "last_modified_date": node.metadata.get("last_modified_date"),
            # "source": node.metadata.get("source"),
            "text": node.text,
            "start_char_idx": node.start_char_idx,
            "end_char_idx": node.end_char_idx,
            # "metadata_str": node.metadata_template,
            # "content": node.text,
        }
        data_lines.append(properties)
    print(data_lines)
    with collection.batch.dynamic() as batch:
        for data_line in data_lines:
            batch.add_object(properties=data_line)
    print("node insert completation!!!!!!!!!!!")
    vector_store = WeaviateVectorStore(weaviate_client=weaviate_client, index_name="TextNode")
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_vector_store(vector_store)
    print(index.index_struct)
    print(index.storage_context)

    query_engine = index.as_query_engine()

    while True:
        question = input("User: ")
        if question.strip() == "":
            break
        start_time = time.time()
        response = query_engine.query(question)
        end_time = time.time()
        print(f"Time taken: {end_time - start_time} seconds")
        print(f"AI: {response}")
finally:
    weaviate_client.close()

Error message is:
{‘message’: ‘Failed to send 1 objects in a batch of 1. Please inspect client.batch.failed_objects or collection.batch.failed_objects for the failed objects.’}

How should I solve it?Thank you

Hi!

You need to inspect client.batch.failed_objects and check the error log in there.

It can be apikeys, timeout, or anything in between.

Can you check the content of that objects?

More info on error handling in python batch:

Thanks!

Thanks!