hi @tapish_22.11 !!
Not sure that having one PDF per tenant is optimal. If you separate them per a property (and Langchain already does it, using the source
property), you can also create an agent that will understand that a query is about a specific PDF and search for it accordingly.
With that said, this is how You can accomplish an Agent that will query different tenants, depending on the query:
collection = client.collections.delete("Test")
client.collections.create(
"Test",
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=True, auto_tenant_activation=True, auto_tenant_creation=True),
vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(),
generative_config=wvc.config.Configure.Generative.openai(),
)
with client.batch.dynamic() as batch:
batch.add_object(
collection="Test",
tenant="T1",
properties={"text": "object1"},
)
batch.add_object(
collection="Test",
tenant="T2",
properties={"text": "object2"},
)
if client.batch.failed_objects:
print(f"Found {len(client.batch.failed_objects)} failed objects" )
print(client.batch.failed_objects)
import weaviate
from langchain.agents import Tool
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
# Initialize WeaviateVectorStore for each collection
vectorstore = WeaviateVectorStore(client, "Test", "text", embedding=embeddings)
def query_collection1(query: str) -> str:
"""Queries Collection1 and returns the results."""
results = vectorstore.similarity_search(query, k=3, tenant="T1") #Return top 3 results
return "\n".join([doc.page_content for doc in results])
def query_collection2(query: str) -> str:
"""Queries Collection2 and returns the results."""
results = vectorstore.similarity_search(query, k=3, tenant="T2")
return "\n".join([doc.page_content for doc in results])
# Define tools
tools = [
Tool(
name="QueryCollection1",
func=query_collection1,
description="Useful for when you need to query T1 for information.",
),
Tool(
name="QueryCollection2",
func=query_collection2,
description="Useful for when you need to query T2 for information.",
),
]
# Defining the agent
from langchain.agents import ZeroShotAgent, AgentExecutor
from langchain import LLMChain
from langchain_openai import OpenAI
# Initialize LLM
llm = OpenAI(temperature=0)
# Set up the agent
prefix = """Answer the following questions as best you can. You have access to the following tools:"""
suffix = """Begin! Remember to answer the question, and use the correct tool.
Question: {input}
{agent_scratchpad}"""
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=["input", "agent_scratchpad"],
)
llm_chain = LLMChain(llm=llm, prompt=prompt)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True) #Verbose for debugging.
agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
# running a query:
query1 = "Tell me about the documents T2"
response1 = agent_chain.run(query1)
print(response1)
and this should be the output:
Entering new AgentExecutor chain…
Thought: I should query T2 for information about the documents.
Action: QueryCollection2
Action Input: “documents”
Observation: object2
Thought: I should query T2 for the specific information about the documents.
Action: QueryCollection2
Action Input: “specific information”
Observation: object2
Thought: I now know the final answer
Final Answer: The documents in T2 contain specific information.
Finished chain.
The documents in T2 contain specific information.
Let me know if that helps!
Thanks!