Description
Hi,
I am trying to use Weaviate with the Azure OpenAI service. I have a gpt-4o model deployed there.
I am connecting to the weaviate docker container like this:
self.client = weaviate.connect_to_custom(
            http_host=self.config.weaviate_host,
            http_port=self.config.weaviate_port,
            http_secure=False,
            grpc_host=self.config.weaviate_host,
            grpc_port=self.config.weaviate_grpc_port,
            grpc_secure=False,
            headers={
                "X-Azure-Api-Key": self.config.azure_openai_key,
                "X-Azure-Client-Value": self.resource_name
            }
        )
My docker compose is as follows:
services:
  weaviate:
    command:
    - --host
    - 0.0.0.0
    - --port
    - '8080'
    - --scheme
    - http
    image: cr.weaviate.io/semitechnologies/weaviate:1.27.8
    ports:
    - 8080:8080
    - 50051:50051
    volumes:
    - weaviate_data:/var/lib/weaviate
    restart: on-failure:0
    environment:
      QUERY_DEFAULTS_LIMIT: 25
      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
      PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
      ENABLE_API_BASED_MODULES: 'true'
      CLUSTER_HOSTNAME: 'node1'
      ENABLE_MODULES: 'text2vec-azure-openai'
      AZURE_OPENAI_ENDPOINT: 'https://*****instance.openai.azure.com'
      AZURE_OPENAI_API_KEY: '****'
volumes:
  weaviate_data:
I am creating the collection like this:
def _create_collection(self, resource_name: str):
        """Create the Weaviate collection if it doesn't exist."""
        try:
            try:
                collection = self.client.collections.get(self.collection_name)
                logging.info(f"Using existing collection: {self.collection_name}")
            except weaviate.exceptions.WeaviateQueryError:
                # Collection doesn't exist, create it
                collection = self.client.collections.create(
                    name=self.collection_name,
                    vectorizer_config=weaviate.classes.config.Configure.Vectorizer.text2vec_azure_openai(
                        vectorizer="text2vec-azure-openai",
                        resource_name=resource_name,
                        deployment_id=self.config.azure_openai_deployment
                    ),
                    properties=[
                        weaviate.classes.properties.Property(
                            name="content",
                            data_type=weaviate.classes.datatypes.DataType.TEXT,
                            description="The chunk content",
                            vectorize=True
                        ),
                        weaviate.classes.properties.Property(
                            name="doc_id",
                            data_type=weaviate.classes.datatypes.DataType.TEXT,
                            description="Document identifier"
                        ),
                        weaviate.classes.properties.Property(
                            name="chunk_id",
                            data_type=weaviate.classes.datatypes.DataType.INT,
                            description="Chunk number within document"
                        ),
                        weaviate.classes.properties.Property(
                            name="source",
                            data_type=weaviate.classes.datatypes.DataType.TEXT,
                            description="Document source"
                        ),
                        weaviate.classes.properties.Property(
                            name="last_updated",
                            data_type=weaviate.classes.datatypes.DataType.DATE,
                            description="Last update timestamp"
                        ),
                        weaviate.classes.properties.Property(
                            name="content_hash",
                            data_type=weaviate.classes.datatypes.DataType.TEXT,
                            description="Hash of document content"
                        ),
                        weaviate.classes.properties.Property(
                            name="file_path",
                            data_type=weaviate.classes.datatypes.DataType.TEXT,
                            description="Original file path"
                        )
                    ]
                )
                logging.info(f"Created new collection: {self.collection_name}")
                
        except Exception as e:
            logging.error(f"Error creating collection: {str(e)}")
            raise
And ingesting documents:
def ingest_document(self, content: str, source: str, file_path: str = None) -> str:
        """Ingest a document into Weaviate."""
        try:
            doc_id = self._generate_doc_id(content, source)
            content_hash = hashlib.md5(content.encode()).hexdigest()
            
            # Get collection
            collection = self.client.collections.get(self.collection_name)
            
            # Delete existing chunks if document exists
            try:
                where_filter = {
                    "path": ["doc_id"],
                    "operator": "Equal",
                    "valueString": doc_id
                }
                collection.data.delete_many(where_filter)
            except Exception as e:
                logging.warning(f"Error deleting existing chunks: {str(e)}")
            # Create new chunks
            chunks = self._chunk_document(content)
            current_time = datetime.now(timezone.utc).isoformat()
            
            # Prepare objects for batch import
            objects = []
            for i, chunk in enumerate(chunks):
                properties = {
                    "content": chunk,
                    "doc_id": doc_id,
                    "chunk_id": i,
                    "source": source,
                    "last_updated": current_time,
                    "content_hash": content_hash
                }
                
                if file_path:
                    properties["file_path"] = file_path
                    
                objects.append(properties)
            # Import all chunks in a single batch
            if objects:
                collection.data.insert_many(objects)
            return doc_id
            
        except Exception as e:
            logging.error(f"Error ingesting document: {str(e)}")
            raise
And, yet, I am getting the following error:
2024-12-16 09:45:34,854 - ERROR - Error processing C:\Projects\Qualification Toolbox\backend\documents\technical qualification-v30-20241202_045512.pdf: Every object failed during insertion. Here is the set of all errors: API Key: no api key found neither in request header: X-Openai-Api-Key nor in environment variable under OPENAI_APIKEY
Processing existing documents: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 8/8 [00:00<00:00, 12.53it/s] 
2024-12-16 09:45:34,872 - ERROR - Error querying similar chunks: Query call with protocol GRPC search failed with message <AioRpcError of RPC that terminated with:
        status = StatusCode.UNKNOWN
        details = "explorer: get class: concurrentTargetVectorSearch): explorer: get class: vectorize search vector: vectorize params: vectorize params: vectorize keywords: remote client vectorize: API Key: no api key found neither in request header: X-Openai-Api-Key nor in environment variable under OPENAI_APIKEY"
        debug_error_string = "UNKNOWN:Error received from peer  {created_time:"2024-12-15T22:45:34.8592749+00:00", grpc_status:2, grpc_message:"explorer: get class: concurrentTargetVectorSearch): explorer: get class: vectorize search vector: vectorize params: vectorize params: vectorize keywords: remote client vectorize: API Key: no api key found neither in request header: X-Openai-Api-Key nor in environment variable under OPENAI_APIKEY"}"
>.
Why am I being asked to provide a OPENAI_APIKEY  ?
Server Setup Information
- Weaviate Server Version:
- Deployment Method: official docker image
- Multi Node? Number of Running Nodes:
- Client Language and Version: EN, weaviate-client==4.9.6
- Multitenancy?: No
 
    
  
  
        
    
    
  
  
    
    
