Description
I am trying out weviate vector store through langchain integration running default docker image , but can’t seem to get the client connection provided my code and logs bellow , i am now trying the collection methoad but it seems quite complex and new to me so wanted to give my existing code adapted to weviate . i have to use a peft finetuned model as generater hence i can’t find how to integrate that in the collection creation .
import json
from langchain_core.documents import Document
from langchain.embeddings import SentenceTransformerEmbeddings
import weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore
client = weaviate.connect_to_local()
def load_and_process_data(file_path):
with open(file_path, 'r') as f:
data = json.load(f)
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
title_description_docs = []
chunk_docs = []
for outer_key, outer_value in data.items():
for inner_key, inner_value in outer_value.items():
if isinstance(inner_value, dict):
title = inner_value.get("TITLE", "")
description = inner_value.get("Description", "")
lesson_id = inner_key
page_content = f"Title: {title} Description: {description}"
doc = Document(page_content=page_content, metadata={'lesson_id': lesson_id, 'title': title, 'description': description}, ids=[lesson_id])
title_description_docs.append(doc)
transcript_chunk = inner_value.get("Transcript_Chunk", {})
for chunk_key, chunk_value in transcript_chunk.items():
# Processing each QA pair to add them directly into the metadata
qa_pairs = chunk_value.get('QA_pairs', [])
chunk_metadata = {
'lesson_id': inner_key,
'chunk_id': chunk_key,
'lesson_title': title,
'keywords': chunk_value.get('keywords', []),
'summary': chunk_value.get('Summary', ''),
}
for index, qa in enumerate(qa_pairs):
question_key = f'QA_pair_{index + 1}_Question'
answer_key = f'QA_pair_{index + 1}_Answer'
question = qa.get('Question', '')
answer = qa.get('Answer', '')
chunk_metadata[question_key] = question
chunk_metadata[answer_key] = answer
doc = Document(page_content=chunk_value.get('context', ''), metadata=chunk_metadata, ids=[chunk_key])
chunk_docs.append(doc)
# Create databases
db_title_description = WeaviateVectorStore.from_documents(title_description_docs, embedding_function, persist_directory="Data/title_description",client=client)
db_chunks = WeaviateVectorStore.from_documents(chunk_docs, embedding_function, persist_directory="Data/chunks",client=client)
return db_title_description, db_chunks
# Path to your JSON file
file_path = 'Data/RAG_Dataset.json'
db_title_description, db_chunks = load_and_process_data(file_path)
Server Setup Information
- Weaviate Server Version: 1.24.7
- Deployment Method: Docker
- Multi Node? Number of Running Nodes: 1
- Client Language and Version:
- Name: weaviate-client , Version: 4.5.5
- Name: langchain-weaviate , Version: 0.0.1.post1
Logs
---------------------------------------------------------------------------
UnexpectedStatusCodeError Traceback (most recent call last)
<ipython-input-15-dd7f7b1a24dc> in <cell line: 7>()
5 from langchain_weaviate.vectorstores import WeaviateVectorStore
6
----> 7 client = weaviate.connect_to_local()
8
9 def load_and_process_data(file_path):
7 frames
/usr/local/lib/python3.10/dist-packages/weaviate/connect/helpers.py in connect_to_local(host, port, grpc_port, headers, additional_config, skip_init_checks, auth_credentials)
155 auth_client_secret=auth_credentials,
156 )
--> 157 return __connect(client)
158
159
/usr/local/lib/python3.10/dist-packages/weaviate/connect/helpers.py in __connect(client)
343 except Exception as e:
344 client.close()
--> 345 raise e
/usr/local/lib/python3.10/dist-packages/weaviate/connect/helpers.py in __connect(client)
339 def __connect(client: WeaviateClient) -> WeaviateClient:
340 try:
--> 341 client.connect()
342 return client
343 except Exception as e:
/usr/local/lib/python3.10/dist-packages/weaviate/client.py in connect(self)
280 if self._connection.is_connected():
281 return
--> 282 self._connection.connect(self.__skip_init_checks)
283
284 def is_connected(self) -> bool:
/usr/local/lib/python3.10/dist-packages/weaviate/connect/v4.py in connect(self, skip_init_checks)
653
654 def connect(self, skip_init_checks: bool) -> None:
--> 655 super().connect(skip_init_checks)
656 # create GRPC channel. If Weaviate does not support GRPC then error now.
657 self._grpc_channel = self._connection_params._grpc_channel(
/usr/local/lib/python3.10/dist-packages/weaviate/connect/v4.py in connect(self, skip_init_checks)
139 # need this to get the version of weaviate for version checks
140 try:
--> 141 self._weaviate_version = _ServerVersion.from_string(self.get_meta()["version"])
142 except (WeaviateConnectionError, ReadError, RemoteProtocolError) as e:
143 raise WeaviateStartUpError(f"Could not connect to Weaviate:{e}.") from e
/usr/local/lib/python3.10/dist-packages/weaviate/connect/v4.py in get_meta(self)
577 """
578 response = self.get(path="/meta")
--> 579 res = _decode_json_response_dict(response, "Meta endpoint")
580 assert res is not None
581 return res
/usr/local/lib/python3.10/dist-packages/weaviate/util.py in _decode_json_response_dict(response, location)
927 raise ResponseCannotBeDecodedError(location, response)
928
--> 929 raise UnexpectedStatusCodeError(location, response)
930
931
UnexpectedStatusCodeError: Meta endpoint! Unexpected status code: 404, with response body: None.
UnexpectedStatusCodeError: Meta endpoint! Unexpected status code: 404, with response body: None.