The main target of making the code was to pass the documents in 1 dataframe through the questions in another dataframe and find the top3 documents for each question along with their scores.
Now I am only getting the questions in the output.
So if anyone has any ideas please comment here.
Create a Weaviate client
client = weaviate.Client(
url=“https://#####.weaviate.network”,
auth_client_secret=weaviate.AuthApiKey(api_key=“############################”),
additional_headers={
“X-HuggingFace-Api-Key”: “##_##################################”
}
)
#client.schemas.delete_all()
Iterate over questions and documents
for _,question_row in df2.iterrows(): # question_row in df2
# Create Question object
question = { # Questions
“title”: question_row[“Questions”]
}
uuid_question = generate_uuid5(question, “Question”)
client.batch.add_data_object(
data_object=question, # Questions
class_name=“Question”,
uuid=uuid_question, #Questions
)
# Iterate over documents for each question
for _,document_row in df1.iterrows():
# Create Document object
document = {
"title": document_row["preprocessed_text"]
}
uuid_document = generate_uuid5(document, "Document")
client.batch.add_data_object(
data_object=document,
class_name="Document",
uuid=uuid_document,
)
# Add reference from Question to Document
client.batch.add_reference(
from_object_uuid=uuid_question,
from_object_class_name="Question",
from_property_name="documents",
to_object_uuid=uuid_document,
to_object_class_name="Document",
)
Create objects and references in the batch
result = client.batch.create_objects()
Run queries for each question
for _, question_row in df2.iterrows():
question = question_row[“Questions”]
for _, document_row in df1.iterrows():
document = document_row[“preprocessed_text”]
query = """
{
Get {
Document(
where: {
text: {
vector: {
cosineSimilarity: {
vector: [%s]
certainty: 0.8
}
}
}
}
first: 3
order: [{distance: DESC}]
) {
edges {
node {
id
distance
properties {
title
# Include other desired properties here
}
}
}
}
}
}
""" % question
url = "https://testing-gto38b37.weaviate.network"
headers = {
"Content-Type": "application/json"
}
payload = {
"query": json.dumps(query)
}
response = requests.post(url, headers=headers, json=payload)
response_data = response.json()
results = response_data.get("data", {}).get("Get", {}).get("Question", [])
# Print the results for each question
print(f"Results for Question: {question}")
for result in results:
print("Document ID:", result.get("id"))
print("Distance:", result.get("distance"))
print()