MUVERA ERROR

Hi,

I keep getting this error when im trying to query a collection i have that was build with MUVERA.

The collection was created through a custom embedder, specifically "Metric-AI/ColQwen2.5-7b-multilingual-v1.0”

The error:

ERROR:main.vector_dabases.weaviate_manager:Error performing search: Query call with protocol GRPC search failed with message explorer: get class: concurrentTargetVectorSearch): explorer: get class: vector search: object vector search at index multimodalmultivqaonbd: shard multimodalmultivqaonbd_alRcqENs64jA: panic occurred: runtime error: index out of range [0] with length 0.
Processing query queries: 100%|██████████████████████████████| 10/10

Code itself:

import stamina

import logging

import json

from typing import List, Set, Optional, Any, Dict, Union

import weaviate

from weaviate.classes.config import Configure

from weaviate.classes.query import MetadataQuery

from weaviate.classes.config import Property, DataType

from .base_database import BaseVectorDatabase, DatabaseConfig, Document, SearchResult, SearchResults, VectorDatabaseFactory

class WeaviateManager(BaseVectorDatabase):

def _init_(self, config: DatabaseConfig):

super()._init_(config)

self.client = None

self.collection = None

self._connected = False

self.logger = logging.getLogger(_name_)

def _sanitize_collection_name(self, name: str) → str:

    parts = name.replace('-', '\_').replace('.', '\_').split('\_')

    sanitized = ''.join(part.capitalize() *for* part *in* parts *if* part.isalnum())

if sanitized and not sanitized[0].isupper():

        sanitized = sanitized.capitalize()

return sanitized

async def connect(self) → bool:

try:

        auth_config = weaviate.auth.AuthApiKey(*api_key*=config.api_key)

        additional_config = weaviate.config.AdditionalConfig(

timeout=weaviate.config.Timeout(

init=120,

query=120,

insert=120

            )

        )

self.client = weaviate.connect_to_weaviate_cloud(

cluster_url=self.config.url if self.config.url.startswith(“http”) else f"https://{self.config.url}",

auth_credentials=auth_config,

additional_config=additional_config

        )

if self.client.is_ready():

self._connected = True

self.logger.info(f"Successfully connected to Weaviate at {self.config.url}")

            meta = self.client.get_meta()

            version = meta.get('version', 'unknown')

self.logger.info(f"Weaviate server version: {version}")

return True

else:

self.logger.error(“Failed to connect to Weaviate”)

return False

except Exception as e:

self.logger.error(f"Error connecting to Weaviate: {str(e)}")

return False

async def disconnect(self) → bool:

try:

if self.client:

self.client.close()

self._connected = False

self.logger.info(“Disconnected from Weaviate”)

return True

except Exception as e:

self.logger.error(f"Error disconnecting from Weaviate: {str(e)}")

return False

async def initialize_collection(self) → bool:

try:

if not self._connected:

await self.connect()

        collection_name = self.config.collection_name

        sanitized_name = self.\_sanitize_collection_name(collection_name)

self.logger.info(f"Using sanitized collection name: ‘{sanitized_name}’ (from ‘{collection_name}’)")

if self.client.collections.exists(sanitized_name):

self.logger.warning(f"Collection ‘{sanitized_name}’ already exists")

self.collection = self.client.collections.get(sanitized_name)

return True

        vector_config=\[

            Configure.MultiVectors.self_provided(

name=“custom_multi_vector”,

encoding=Configure.VectorIndex.MultiVector.Encoding.muvera(),

            ),

        \]



        properties = \[

Property(name=“filename”, data_type=DataType.TEXT),

        \]

self.collection = self.client.collections.create(

name=sanitized_name,

vector_config=vector_config,

properties=properties

        )

self.logger.info(f"Successfully created collection ‘{sanitized_name}’")

return True

except Exception as e:

        collection_name = getattr(self.config, 'collection_name', 'unknown')

        sanitized_name = self.\_sanitize_collection_name(collection_name) *if* hasattr(self, '\_sanitize_collection_name') *else* 'unknown'

self.logger.error(f"Error creating collection ‘{sanitized_name}’ (from ‘{collection_name}’): {str(e)}")

return False

async def get_indexed_files(self) → Set[str]:

try:

if not self._connected:

await self.connect()

        indexed_files = set()

        sanitized_name = self.\_sanitize_collection_name(self.config.collection_name)

        collection = self.client.collections.get(sanitized_name)

for obj in collection.iterator(include_vector=False):

            filename = obj.properties.get("filename", "")

if filename:

                indexed_files.add(filename)

return indexed_files

except Exception as e:

self.logger.error(f"Error retrieving indexed files: {str(e)}")

return set()

@stamina.retry(*on*=Exception, *attempts*=3)

async def index_document(

self,

embedding: Union[List[float], List[List[float]]],

metadata: Dict[str, Any]

) -> bool:

try:

if not self._connected:

await self.connect()

        sanitized_name = self.\_sanitize_collection_name(self.config.collection_name)

        collection = self.client.collections.get(sanitized_name)

        properties = {

“filename”: metadata.get(“filename”, “”),

        }

        uuid_obj = collection.data.insert(

properties=properties,

vector={“custom_multi_vector”: embedding}

        )

self.logger.debug(f"Successfully indexed document with UUID: {uuid_obj}")

return True

except Exception as e:

self.logger.error(f"Error indexing document: {str(e)}")

return False

async def search(

self,

query_embedding: Union[List[float], List[List[float]]],

limit: int = 5,

filter_conditions: Optional[Dict[str, Any]] = None

) -> SearchResults:

try:

if not self._connected:

await self.connect()

        sanitized_name = self.\_sanitize_collection_name(self.config.collection_name)

        collection = self.client.collections.get(sanitized_name)

        response = collection.query.near_vector(

near_vector=query_embedding,

target_vector=“custom_multi_vector”,

limit=limit,

return_metadata=MetadataQuery(distance=True )

        )

        results = \[\]

for obj in response.objects:

            full_metadata = obj.properties.copy()

            result = SearchResult(

document_id=obj.properties.get(“filename”, “”),

score= 1 - obj.metadata.distance if obj.metadata else -1.0,

metadata=full_metadata

            )

            results.append(result)

return SearchResults(results=results)

except Exception as e:

self.logger.error(f"Error performing search: {str(e)}")

return SearchResults(results=)

def _del_(self):

if self.client:

try:

self.client.close()

except:

pass

VectorDatabaseFactory.register_database(“weaviate”, WeaviateManager)

Hey Emre!

Been there with MUVERA errors - they’re super frustrating because the error message doesn’t tell you what’s actually wrong. That “index out of range with length 0” is MUVERA trying to access an empty vector array.

Quick diagnosis first - add this before your search to see what’s happening:

# Check if your collection actually has datacollection = self.client.collections.get(sanitized_name) count = collection.aggregate.over_all(total_count=True)print(f"Collection has {count.total_count} objects") # Check your query vector format print(f"Query vector type: {type(query_embedding)}") print(f"Query vector length: {len(query_embedding) ifquery_embeddingelse0}")

Most likely fix:

Your ColQwen2.5 model produces multi-vectors, but your search query might be passing them in the wrong format. Try this modification in your search method:

# Before the near_vector call, validate the format: if notquery_embeddingorlen(query_embedding) == 0: self.logger.error("Empty query embedding received") returnSearchResults(results=[]) # Make sure it's the right multi-vector format for MUVERAresponse = collection.query.near_vector( near_vector=query_embedding, target_vector="custom_multi_vector", limit=limit, return_metadata=MetadataQuery(distance=True), include_vector=False # This can help avoid return format issues)

Common causes I’ve seen:

  1. Empty collection - indexing failed silently but collection exists

  2. Vector format mismatch - query vectors don’t match indexed format

  3. ColQwen2.5 output inconsistency - sometimes the model returns different shapes

Try querying with the exact same embedding you used during indexing first. If that works, the issue is in how you’re formatting query vectors from ColQwen2.5.

Let me know what the diagnostic prints show and we can narrow it down further!

1 Like

Hi @Emre_Kuru ,

I am Roberto from Applied Research team. The error you found is because MUVERA parameters are not available or lost for some reason (particularly random vectors used for space partitioning).

As @Chaitanya_Kulthe suggested a possible cause is that you are trying to query an empty collection. Are you able to get information about the collection? Is there any data into it?

Dear Roberto,

I did have data in it i’m sure, I no longer have access to the collection ( I have since deleted the cluster ) but at the time of the error I am %100 sure the collection indeed had data in it.

And also I inspected a vector from one of the data points and it seemed that muvera was applied successfully.

But @Chaitanya_Kulthe was correct in that the data points had different shapes, specifically 1114 and 1331 for some reason. Two distinct categories. )

I will soon try again with the discussed solutions, but any more suggestions would be appreciated :slight_smile:

Thank you both for the assistance
Emre Kuru

Roberto Esposito via Weaviate Community Forum <notifications@weaviate.discoursemail.com>, 11 Ağu 2025 Pzt, 18:38 tarihinde şunu yazdı:

1 Like