thank you so much ,here are my code
import os
import io
import boto3
import weaviate
from pypdf import PdfReader
from typing import List, Dict
from openai import AzureOpenAI
from weaviate.auth import AuthApiKey
from langchain_text_splitters import RecursiveCharacterTextSplitter
def get_weaviate_client():
“”“Connects to Weaviate”“”
return weaviate.connect_to_wcs(
cluster_url=WEAVIATE_CONFIG[“url”],
auth_credentials=AuthApiKey(WEAVIATE_CONFIG[“api_key”])
)
def initialize_weaviate_schema(client):
“”“Creates the Weaviate schema if it does not exist.”“”
if not client.collections.exists(WEAVIATE_CONFIG[“class_name”]):
client.collections.create(
name=WEAVIATE_CONFIG[“class_name”],
vectorizer_config=weaviate.classes.config.Configure.Vectorizer.none(),
properties=[
weaviate.classes.config.Property(name=“source”, data_type=weaviate.classes.config.DataType.TEXT),
weaviate.classes.config.Property(name=“text”, data_type=weaviate.classes.config.DataType.TEXT),
weaviate.classes.config.Property(name=“page”, data_type=weaviate.classes.config.DataType.INT),
weaviate.classes.config.Property(name=“chunk_index”, data_type=weaviate.classes.config.DataType.INT)
]
)
def get_azure_embeddings(client: AzureOpenAI, text: str) → List[float]:
“”“Generates embeddings using Azure OpenAI”“”
response = client.embeddings.create(
input=text,
model=AZURE_CONFIG[“embedding_deployment”]
)
return response.data[0].embedding
def process_pdf(content: bytes, source_key: str, weaviate_client, aoai_client):
“”“Processes a PDF file, extracts text, and stores it in Weaviate.”“”
pdf_reader = PdfReader(io.BytesIO(content))
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
collection = weaviate_client.collections.get(WEAVIATE_CONFIG[“class_name”])
for page_num, page in enumerate(pdf_reader.pages, start=1):
text = page.extract_text()
if text:
chunks = text_splitter.split_text(text)
for chunk_idx, chunk in enumerate(chunks):
try:
vector = get_azure_embeddings(aoai_client, chunk)
collection.data.insert(
properties={
"source": source_key,
"text": chunk,
"page": page_num,
"chunk_index": chunk_idx
},
vector=vector
)
except Exception as e:
print(f"Error inserting chunk: {str(e)}")
s3 = boto3.client(‘s3’, **AWS_CREDENTIALS)
w_client = get_weaviate_client()
aoai_client = AzureOpenAI(
api_key=AZURE_CONFIG[“api_key”],
api_version=AZURE_CONFIG[“api_version”],
azure_endpoint=AZURE_CONFIG[“endpoint”]
)
initialize_weaviate_schema(w_client)
paginator = s3.get_paginator(‘list_objects_v2’)
pages = paginator.paginate(Bucket=AWS_BUCKET_INFO[“bucket_name”], Prefix=AWS_BUCKET_INFO[“prefix”])
for page in pages:
for obj in page.get(‘Contents’, ):
if obj[‘Key’].lower().endswith(‘.pdf’):
try:
response = s3.get_object(Bucket=AWS_BUCKET_INFO[“bucket_name”], Key=obj[‘Key’])
content = response[‘Body’].read()
process_pdf(content, obj[‘Key’], w_client, aoai_client)
print(f"Processed {obj[‘Key’]} successfully")
except Exception as e:
print(f"Error processing {obj[‘Key’]}: {str(e)}")
def retrieve_and_generate(query: str, weaviate_client, aoai_client):
“”“Retrieves relevant documents and generates a response using RAG.”“”
# Generate embedding for the query using Azure OpenAI
vector = get_azure_embeddings(aoai_client, query)
# Perform vector search in Weaviate
results = (
weaviate_client.query
.get(WEAVIATE_CONFIG["class_name"], ["text"])
.with_near_vector({"vector": vector})
.with_limit(5)
.do()
)
# Extract relevant text from retrieved documents
if 'data' in results and 'Get' in results['data'] and WEAVIATE_CONFIG["class_name"] in results['data']['Get']:
context = "\n".join([obj['text'] for obj in results['data']['Get'][WEAVIATE_CONFIG["class_name"]]])
else:
context = "No relevant context found."
# Construct a RAG prompt
prompt = f"""
You are an AI assistant. Use the following retrieved context to answer the question.
Context: {context}
Question: {query}
"""
# Generate response using Azure OpenAI
response = aoai_client.completions.create(
model=AZURE_CONFIG["model"],
prompt=prompt,
max_tokens=300
)
return response.choices[0].text.strip()
user_query = “What are the main credit card policies?”
response = retrieve_and_generate(user_query, w_client, aoai_client)
print(“\nGenerated Response:\n”, response)
AttributeError: ‘WeaviateClient’ object has no attribute ‘query’
help me to sort this issue