Trying to resolve the error ,here are my code and error

import weaviate
from weaviate.classes.init import Auth
import google.generativeai as genai
from typing import List, Dict
import os
from typing import List, Dict
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain.embeddings import HuggingFaceEmbeddings
from weaviate.classes import query as wvc
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

WEAVIATE_API_KEY = ""
WEAVIATE_URL = ""
gemini_api_key = ""
huggingface_api_key = ""

# Connect to Weaviate Cloud
client = weaviate.connect_to_weaviate_cloud(
    cluster_url= WEAVIATE_URL,
    auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
)

print(client.is_ready())

huggingface_key = huggingface_api_key
headers = {
    "X-HuggingFace-Api-Key": huggingface_key,
}

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=WEAVIATE_URL,                       # `weaviate_url`:  Weaviate URL
    auth_credentials=Auth.api_key(WEAVIATE_API_KEY),      # `weaviate_key`:  Weaviate API key
    headers=headers
)


# Initialize Gemini
genai.configure(api_key=gemini_api_key)



print("Client is Ready?", client.is_ready())
from weaviate import classes as wvc

client.collections.delete("WikipediaLangChain")

from weaviate.classes.config import Configure

client.collections.create(
    "WikipediaLangChain",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_huggingface(
            name="title_vector",
            source_properties=["title"],
            model="sentence-transformers/all-MiniLM-L6-v2",
        )
    ],

)

embeddings = GoogleGenerativeAIEmbeddings(
        model="models/embedding-001",  # Google's text embedding model
        google_api_key= gemini_api_key
    )


text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)



# import first article
loader = PyPDFLoader("brazil-wikipedia-article-text.pdf", extract_images=False)
docs = loader.load_and_split(text_splitter)
print(f"GOT {len(docs)} docs for Brazil")
db = WeaviateVectorStore.from_documents(docs, embeddings, client=client, index_name="WikipediaLangChain")


# import second article
loader = PyPDFLoader("netherlands-wikipedia-article-text.pdf", extract_images=False)
docs = loader.load_and_split(text_splitter)
print(f"GOT {len(docs)} docs for Netherlands")
db = WeaviateVectorStore.from_documents(docs, embeddings, client=client, index_name="WikipediaLangChain")

# Create vector store
vector_store = WeaviateVectorStore(
    client=client,
    index_name="WikipediaLangChain",
    text_key="text",
    embedding=embeddings,  
    attributes=["source"]
)

vector_store.add_documents(docs)
collection = client.collections.get("WikipediaLangChain")
# lets first get our collection
collection = client.collections.get("WikipediaLangChain")

response = collection.aggregate.over_all(total_count=True)
print(response)

# Group by source
response = collection.aggregate.over_all(group_by="source")
for group in response.groups:
    print(group.grouped_by.value, group.total_count)

# View object properties
object = collection.query.fetch_objects(limit=1).objects[0]
print(object.properties.keys())
print(object.properties.get("source"))
print(object.properties.get("page"))
print(object.properties.get("text"))

# Query in French using Gemini
generateTask = "Quelle est la nourriture traditionnelle de ce pays?"
source_file = "brazil-wikipedia-article-text.pdf"

model = ChatGoogleGenerativeAI(
    model="gemini-pro", 
    google_api_key= gemini_api_key
)

# lets do a RAG directly using only Weaviate



query = collection.generate.near_text(
    query="tradicional food",
  
    limit=10,
    grouped_task=generateTask
)
print(query.generated)

AioRpcError Traceback (most recent call last)
File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\weaviate\collections\grpc\query.py:805, in _QueryGRPC.__call(self, request)
804 assert self._connection.grpc_stub is not None
→ 805 res = await _Retry(4).with_exponential_backoff(
806 0,
807 f"Searching in collection {request.collection}",
808 self._connection.grpc_stub.Search,
809 request,
810 metadata=self._connection.grpc_headers(),
811 timeout=self._connection.timeout_config.query,
812 )
813 return cast(search_get_pb2.SearchReply, res)

File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\weaviate\collections\grpc\retry.py:31, in _Retry.with_exponential_backoff(self, count, error, f, *args, kwargs)
30 if e.code() != StatusCode.UNAVAILABLE:
—> 31 raise e
32 logger.info(
33 f"{error} received exception: {e}. Retrying with exponential backoff in {2
count} seconds"
34 )

File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\weaviate\collections\grpc\retry.py:28, in _Retry.with_exponential_backoff(self, count, error, f, *args, **kwargs)
27 try:
—> 28 return await f(*args, **kwargs)
29 except AioRpcError as e:

File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\grpc\aio_call.py:327, in _UnaryResponseMixin.await(self)
326 else:
→ 327 raise _create_rpc_error(
328 self._cython_call._initial_metadata,
329 self._cython_call._status,
330 )
331 else:

AioRpcError: <AioRpcError of RPC that terminated with:
status = StatusCode.UNKNOWN
details = “explorer: get class: concurrentTargetVectorSearch): explorer: get class: vector search: object vector search at index wikipedialangchain: shard wikipedialangchain_mj30ETuKNGfK: vector search: knn search: distance between entrypoint and query node: 768 vs 384: vector lengths don’t match”
debug_error_string = “UNKNOWN:Error received from peer {grpc_message:“explorer: get class: concurrentTargetVectorSearch): explorer: get class: vector search: object vector search at index wikipedialangchain: shard wikipedialangchain_mj30ETuKNGfK: vector search: knn search: distance between entrypoint and query node: 768 vs 384: vector lengths don't match”, grpc_status:2, created_time:“2025-01-08T06:29:39.4893321+00:00”}”

During handling of the above exception, another exception occurred:

WeaviateQueryError Traceback (most recent call last)
Cell In[59], line 5
1 # lets do a RAG directly using only Weaviate
----> 5 query = collection.generate.near_text(
6 query=“tradicional food”,
7
8 limit=10,
9 grouped_task=generateTask
10 )
11 print(query.generated)

File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\weaviate\syncify.py:23, in convert..sync_method(self, __new_name, *args, **kwargs)
20 @wraps(method) # type: ignore
21 def sync_method(self, *args, __new_name=new_name, **kwargs):
22 async_func = getattr(cls, __new_name)
—> 23 return _EventLoopSingleton.get_instance().run_until_complete(
24 async_func, self, *args, **kwargs
25 )

File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\weaviate\event_loop.py:42, in _EventLoop.run_until_complete(self, f, *args, **kwargs)
40 raise WeaviateClosedClientError()
41 fut = asyncio.run_coroutine_threadsafe(f(*args, **kwargs), self.loop)
—> 42 return fut.result()

File c:\Users\dhanu.conda\envs\idk_gpu\lib\concurrent\futures_base.py:458, in Future.result(self, timeout)
456 raise CancelledError()
457 elif self._state == FINISHED:
→ 458 return self.__get_result()
459 else:
460 raise TimeoutError()

File c:\Users\dhanu.conda\envs\idk_gpu\lib\concurrent\futures_base.py:403, in Future.__get_result(self)
401 if self._exception:
402 try:
→ 403 raise self._exception
404 finally:
405 # Break a reference cycle with the exception in self._exception
406 self = None

File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\weaviate\collections\queries\near_text\generate.py:101, in _NearTextGenerateAsync.near_text(self, query, single_prompt, grouped_task, grouped_properties, certainty, distance, move_to, move_away, limit, offset, auto_limit, filters, group_by, rerank, target_vector, include_vector, return_metadata, return_properties, return_references)
28 async def near_text(
29 self,
30 query: Union[List[str], str],
(…)
49 return_references: Optional[ReturnReferences[TReferences]] = None,
50 ) → GenerativeSearchReturnType[Properties, References, TProperties, TReferences]:
51 “”“Perform retrieval-augmented generation (RaG) on the results of a by-image object search in this collection using the image-capable vectorization module and vector-based similarity search.
52
53 See the docs for a more detailed explanation.
(…)
99 If the request to the Weaviate server fails.
100 “””
→ 101 res = await self._query.near_text(
102 near_text=query,
103 certainty=certainty,
104 distance=distance,
105 move_to=move_to,
106 move_away=move_away,
107 limit=limit,
108 offset=offset,
109 autocut=auto_limit,
110 filters=filters,
111 group_by=_GroupBy.from_input(group_by),
112 rerank=rerank,
113 target_vector=target_vector,
114 generative=_Generative(
115 single=single_prompt,
116 grouped=grouped_task,
117 grouped_properties=grouped_properties,
118 ),
119 return_metadata=self._parse_return_metadata(return_metadata, include_vector),
120 return_properties=self._parse_return_properties(return_properties),
121 return_references=self._parse_return_references(return_references),
122 )
123 return self._result_to_generative_return(
124 res,
125 _QueryOptions.from_input(
(…)
135 return_references,
136 )

File c:\Users\dhanu.conda\envs\idk_gpu\lib\site-packages\weaviate\collections\grpc\query.py:817, in _QueryGRPC.__call(self, request)
815 if e.code().name == PERMISSION_DENIED:
816 raise InsufficientPermissionsError(e)
→ 817 raise WeaviateQueryError(str(e), “GRPC search”) # pyright: ignore
818 except WeaviateRetryError as e:
819 raise WeaviateQueryError(str(e), “GRPC search”)

WeaviateQueryError: Query call with protocol GRPC search failed with message <AioRpcError of RPC that terminated with:
status = StatusCode.UNKNOWN
details = “explorer: get class: concurrentTargetVectorSearch): explorer: get class: vector search: object vector search at index wikipedialangchain: shard wikipedialangchain_mj30ETuKNGfK: vector search: knn search: distance between entrypoint and query node: 768 vs 384: vector lengths don’t match”
debug_error_string = “UNKNOWN:Error received from peer {grpc_message:“explorer: get class: concurrentTargetVectorSearch): explorer: get class: vector search: object vector search at index wikipedialangchain: shard wikipedialangchain_mj30ETuKNGfK: vector search: knn search: distance between entrypoint and query node: 768 vs 384: vector lengths don't match”, grpc_status:2, created_time:“2025-01-08T06:29:39.4893321+00:00”}”

.

hi @DhanushKumar_R !!

Welcome to our community :hugs:

Your error message indicates that there is a dimension mismatch.

So your store vectors have on dimensions length, and the query is being passed as a different dimension length:

I see you are using the recipe I have written: recipes/integrations/llm-frameworks/langchain/loading-data at main · weaviate/recipes · GitHub

Nice!! :slight_smile:

The root cause of your error is because you are defining one vectorizer to be used by Weaviate, while using a different one for Langchain here:

client.collections.create(
    "WikipediaLangChain",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_huggingface(
            name="title_vector",
            source_properties=["title"],
            model="sentence-transformers/all-MiniLM-L6-v2",
        )
    ],

)

embeddings = GoogleGenerativeAIEmbeddings(
        model="models/embedding-001",  # Google's text embedding model
        google_api_key= gemini_api_key
    )

Those two must be configured for the same model.

Let me know if this helps!

Thanks!