Description
Hi, I am very new to Weaviate and really help here. I am migrating a dataset from Mongodb to Weaviate. I have created collections and loaded my data which is real estate sample data.
client.connect()
properties = client.collections.get("Property")
response = properties.query.fetch_objects(
limit=10,
return_properties=["zipCode", "city", "bedrooms", "fullBathrooms", "price"],
)
for o in response.objects:
print(o.properties)
But whenever, I add filters, it’s taking around 2 seconds per query.
# Filter results - single filter
import weaviate
import weaviate.classes as wvc
import os
from weaviate.classes.query import MetadataQuery, Filter
try:
client.connect()
properties = client.collections.get("Property")
response = properties.query.fetch_objects(
filters = Filter.by_property("bedrooms").equal(3),
limit=20
#return_metadata=MetadataQuery(distance=True)
)
for o in response.objects:
print(o.properties) # View the returned properties
print(o.metadata.creation_time) # View the returned creation time
finally:
client.close()
Server Setup Information
- Weaviate Server Version: ‘1.27.11’
- Deployment Method: Docker compose
- Multi Node? Number of Running Nodes: Single Node. The host is a VM on ESXI 8, on nvme disks
- Client Language and Version:
- Multitenancy?: NO
Any additional Information
Collection Setup:
property_collection = client.collections.create(
name="Property",
description="Comprehensive real estate property listings",
properties=[
# Unique identifier first
wvc.config.Property(
name="property_uuid",
data_type=wvc.config.DataType.TEXT,
index_filterable=True,
description="Unique identifier to link with images"
),
# Source Information
wvc.config.Property(name="sourceIds", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="sourceUrls", data_type=wvc.config.DataType.TEXT),
# Basic Information (Primary Search Fields)[[]]
wvc.config.Property(
name="address",
data_type=wvc.config.DataType.TEXT,
index_filterable=True,
index_searchable=True
),
wvc.config.Property(
name="city",
data_type=wvc.config.DataType.TEXT,
index_filterable=True,
index_searchable=True
),
wvc.config.Property(
name="state",
data_type=wvc.config.DataType.TEXT,
index_filterable=False
),
wvc.config.Property(
name="zipCode",
data_type=wvc.config.DataType.TEXT,
index_filterable=True,
#index_searchable=True
),
wvc.config.Property(
name="propertyType",
data_type=wvc.config.DataType.TEXT,
index_filterable=True
),
wvc.config.Property(
name="status",
data_type=wvc.config.DataType.TEXT,
index_filterable=True
),
# Numeric Features (Commonly Searched)
wvc.config.Property(
name="price",
data_type=wvc.config.DataType.NUMBER,
#index_filterable=True,
index_range_filters=True
),
# This is a discretized version of price for faster filtering
wvc.config.Property(
name="priceRange",
data_type=wvc.config.DataType.TEXT,
index_filterable=True,
description="Discretized price range for faster filtering"
),
wvc.config.Property(
name="pricePerSqft",
data_type=wvc.config.DataType.NUMBER,
#index_filterable=True,
index_range_filters=True
),
wvc.config.Property(
name="bedrooms",
data_type=wvc.config.DataType.NUMBER,
index_filterable=True
),
wvc.config.Property(
name="fullBathrooms",
data_type=wvc.config.DataType.NUMBER,
index_filterable=True
),
wvc.config.Property(
name="halfBathrooms",
data_type=wvc.config.DataType.NUMBER,
index_filterable=True
),
wvc.config.Property(
name="totalSqft",
data_type=wvc.config.DataType.NUMBER,
index_filterable=True,
index_range_filters=True
),
wvc.config.Property(
name="lotSize",
data_type=wvc.config.DataType.NUMBER,
index_filterable=True
),
wvc.config.Property(
name="yearBuilt",
data_type=wvc.config.DataType.NUMBER,
index_filterable=True,
index_range_filters=True
),
# Listing Details
wvc.config.Property(
name="daysOnMarket",
data_type=wvc.config.DataType.NUMBER,
index_filterable=True
),
wvc.config.Property(
name="listDate",
data_type=wvc.config.DataType.TEXT,
index_filterable=True
),
# Description and Features (Semantic Search)
wvc.config.Property(
name="description",
data_type=wvc.config.DataType.TEXT,
index_searchable=True
),
wvc.config.Property(
name="keyFeatures",
data_type=wvc.config.DataType.TEXT_ARRAY,
index_searchable=True
),
# Construction and Structure
wvc.config.Property(
name="constructionMaterial",
data_type=wvc.config.DataType.TEXT,
index_filterable=False
),
wvc.config.Property(
name="structureType",
data_type=wvc.config.DataType.TEXT,
index_filterable=False
)
],
vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_ollama(
api_endpoint="http://192.168.10.11:11434",
model="nomic-embed-text"
),
generative_config=wvc.config.Configure.Generative.ollama(
api_endpoint="http://192.168.10.11:11434",
model="phi4"
)
)
Again, I appreciate your help and debugging this. Mongodb is lightening fast and I came to weaviate because of it’s ai friendly features.
Edit 1:
I tried testing raw graphql using requests library and it’s lightening fast. Here is the query I tried with 4 filters:
import requests
import json
# Create headers
headers = {
'Content-Type': 'application/json'
}
# Define the GraphQL query with multiple filters
query = """
{
Get {
Property(
limit: 20
where: {
operator: And
operands: [
{ path: ["zipCode"], operator: Equal, valueString: "22066" }
{ path: ["bedrooms"], operator: Equal, valueNumber: 4 }
{ path: ["fullBathrooms"], operator: Equal, valueNumber: 3 }
{ path: ["price"], operator: LessThan, valueNumber: 3000000 }
]
}
) {
zipCode
city
bedrooms
fullBathrooms
price
address
listDate
}
}
}
"""
# Use session for connection pooling
with requests.Session() as session:
response = session.post(
'http://192.168.10.11:8082/v1/graphql',
headers=headers,
json={'query': query}
)
if response.status_code == 200:
result = response.json()
if 'errors' in result:
print("GraphQL Errors:", result['errors'])
else:
properties = result['data']['Get']['Property']
for property in properties:
print(property)
else:
print(f"Error: {response.status_code}")
print(response.text)