I want to insert data into weaviate cloud instance using textevec_cohere and allow weaviate to create the embeddings.
I’m using Llamaindex to get data from web and then insert it but I get an error stating that I have to define openai_key.
Below my code.
import weaviate
import os
from typing import List
from llama_index.core.schema import BaseNode, Document
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from llama_index.core.storage import StorageContext
from llama_index.core import VectorStoreIndex, Settings
import weaviate.classes.config as wc
from dotenv import load_dotenv
from weaviate.classes.init import Auth, AdditionalConfig, Timeout
from weaviate.exceptions import WeaviateBaseErrorget the data from the web
def AddData() → (List[BaseNode], List[Document]):
docs = SimpleWebPageReader(html_to_text=True).load_data(
[“LlamaIndex and Weaviate | Weaviate - Vector Database”]
)
print(f"Loaded {len(docs)} documents")
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(docs, show_progress=True)
for n in nodes:
print(n.get_content())
return nodes, docsclient = None
load_dotenv()try:
headers = { # "X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY"), "X-Cohere-Api-Key": os.getenv("COHERE_APIKEY") } nodes, documents = AddData() client = weaviate.connect_to_weaviate_cloud( cluster_url=os.getenv("WEAVIATE_URL"), auth_credentials=Auth.api_key(os.getenv("WEAVIATE_APIKEY")), additional_config=AdditionalConfig( timeout=Timeout(init=30, query=60, insert=30), # Values in seconds ), headers=headers, skip_init_checks=False ) # Necessary for Cohere? os.environ["COHERE_API_KEY"] = os.getenv("COHERE_APIKEY") if client.is_ready(): print(f"Weaviate is ready! Successfully connected to {client.get_meta()}") else: print("Failed to connect to Weaviate Cloud") exit(0) # get existing collections collections = client.collections.list_all() if len(collections) > 0: print(f"Found {len(collections)} collections:") [print(c) for c in collections] else: print("No collections found") # Check if BlogPosts collection exists if client.collections.get("BlogPosts").exists(): print("Collection 'BlogPosts' already exists") else: client.collections.create( name="BlogPosts", description="A collection of blog posts", properties=[ wc.Property(name="content", data_type=wc.DataType.TEXT, description="The content of the blog post"), ], # Define the vectorizer module vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), # Define the generative module generative_config=wc.Configure.Generative.cohere() ) vector_store = WeaviateVectorStore(weaviate_client=client, index_name="BlogPosts", text_key="content") storage_context = StorageContext.from_defaults(vector_store=vector_store) # we initiate our index index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
except WeaviateBaseError as e:
print(f"Failed to connect to Weaviate Cloud: {e.message}“)
exit(0)
except Exception as e:
print(f"An error occurred: {e}”)
exit(0)finally:
if client is not None:
client.close()
I get error:
Could not load OpenAI embedding model. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys
Consider using embed_model=‘local’.
Visit our documentation for more embedding options: Redirecting...