# Connect to Weaviate
client = weaviate.connect_to_wcs(
cluster_url=weaviate_url,
auth_credentials=weaviate.auth.AuthApiKey(weaviate_key),
headers={
"X-OpenAI-Api-Key": openai.api_key # Replace with your OpenAI key
}
)
# Check if Weaviate is ready
client.is_ready()
# Configure Weaviate Schema for Two Collections
import weaviate.classes.config as wc
# Schema for Credit Card Documents
client.collections.create(
name="CreditCardDocuments",
vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(
model="ada",
model_version="002",
type_="text"
),
generative_config=wc.Configure.Generative.openai(
model="gpt-4"
),
properties=[
wc.Property(name="type", data_type=wc.DataType.TEXT),
wc.Property(name="element_id", data_type=wc.DataType.TEXT, skip_vectorization=True),
wc.Property(name="text", data_type=wc.DataType.TEXT),
wc.Property(name="embeddings", data_type=wc.DataType.NUMBER_ARRAY, skip_vectorization=True),
],
)
# Schema for HR Documents
client.collections.create(
name="HRDocuments",
vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(
model="ada",
model_version="002",
type_="text"
),
generative_config=wc.Configure.Generative.openai(
model="gpt-4"
),
properties=[
wc.Property(name="type", data_type=wc.DataType.TEXT),
wc.Property(name="element_id", data_type=wc.DataType.TEXT, skip_vectorization=True),
wc.Property(name="text", data_type=wc.DataType.TEXT),
wc.Property(name="embeddings", data_type=wc.DataType.NUMBER_ARRAY, skip_vectorization=True),
],
)
# Define Writer for Weaviate
def get_writer(collection_name: str) -> Writer:
return WeaviateWriter(
connector_config=SimpleWeaviateConfig(
access_config=WeaviateAccessConfig(api_key=weaviate_key),
host_url=weaviate_url,
class_name=collection_name,
),
write_config=WeaviateWriteConfig(),
)
# Ingest Data into CreditCardDocuments
credit_card_writer = get_writer("CreditCardDocuments")
credit_card_runner = S3Runner(
processor_config=ProcessorConfig(
verbose=True,
output_dir="s3-output-credit-card",
num_processes=40,
),
read_config=ReadConfig(),
partition_config=PartitionConfig(
partition_by_api=True,
api_key="marmfWUWJpM8ncY6GQRrftfjKG7LLw", # Replace with your Unstructured API key
partition_endpoint="", # Replace with your Unstructured API URL
),
connector_config=SimpleS3Config(
access_config=S3AccessConfig(
key="A", # Replace with your AWS key
secret="SI", # Replace with your AWS secret
),
remote_url="s3://g/credit-card-documents", # Replace with your S3 bucket path
),
chunking_config=ChunkingConfig(
chunk_elements=True,
chunking_strategy="by_title",
max_characters=8192,
combine_text_under_n_chars=1000,
),
embedding_config=EmbeddingConfig(
provider="azure",
api_key=openai.api_key,
),
writer=credit_card_writer,
writer_kwargs={},
)
credit_card_runner.run()
# Ingest Data into HRDocuments
hr_writer = get_writer("HRDocuments")
hr_runner = S3Runner(
processor_config=ProcessorConfig(
verbose=True,
output_dir="s3-output-hr",
num_processes=40,
),
read_config=ReadConfig(),
partition_config=PartitionConfig(
partition_by_api=True,
api_key="mw", # Replace with your Unstructured API key
partition_endpoint="https://api.unstructuredapp.io", # Replace with your Unstructured API URL
),
connector_config=SimpleS3Config(
access_config=S3AccessConfig(
key="AKR", # Replace with your AWS key
secret="nI", # Replace with your AWS secret
),
remote_url="s3://gg/hr-documents", # Replace with your S3 bucket path
),
chunking_config=ChunkingConfig(
chunk_elements=True,
chunking_strategy="by_title",
max_characters=8192,
combine_text_under_n_chars=1000,
),
embedding_config=EmbeddingConfig(
provider="azure",
api_key=openai.api_key,
),
writer=hr_writer,
writer_kwargs={},
)
hr_runner.run()
# Search in CreditCardDocuments
credit_card_documents = client.collections.get("CreditCardDocuments")
credit_card_response = credit_card_documents.query.hybrid(
query="What is the annual fee for the premium credit card?",
alpha=0.5,
return_properties=['text'],
auto_limit=2
)
print("Credit Card Documents Search Results:")
for obj in credit_card_response.objects:
print(json.dumps(obj.properties, indent=2))
# Search in HRDocuments
hr_documents = client.collections.get("HRDocuments")
hr_response = hr_documents.query.hybrid(
query="What is the company's policy on remote work?",
alpha=0.5,
return_properties=['text'],
auto_limit=2
)
print("HR Documents Search Results:")
for obj in hr_response.objects:
print(json.dumps(obj.properties, indent=2))
When i try the api key with normal chat completion ,it works, but it fails when it works with weaviate