Filtering by IsNull on a cross-referenced object is not working

I have two classes Article and Metadata and configured "indexNullState": true in invertedIndexConfig on both of them.

When I filter from Article on {path: ["hasMetadata","Metadata","article_name"], valueBoolean: true, operator: IsNull} it yields empty results whereas if I filter it directly on the Metadata class it gives me some results.

1 Like

hey @vamsi,

This is a great question.

  1. Can you share both queries in full?
  2. What happens when you run the query with valueBoolean: false?

a hint, you can wrap a whole code example with ``` before and after the code - to display it in a nice format.

Hey @sebawita ,

Here is an example with dummy data:

import weaviate
from weaviate.util import generate_uuid5

client = weaviate.Client("http://localhost:8080")

schema = {
  "classes": [{
    "class": "Article",
    "description": "An article",
      "invertedIndexConfig": {
        "indexNullState": True,
    },
    "properties": [
      {
        "dataType": [
          "text"
        ],
        "description": "Article text",
        "name": "article_text"
      },
      {
        "dataType": [
          "Metadata"
        ],
        "description": "Article metadata",
        "name": "hasMetadata"
      }
    ]
  },{
    "class": "Metadata",
    "description": "Article Metadata",
      "invertedIndexConfig": {
        "indexNullState": True,
    },
    "properties": [
      {
        "dataType": [
          "text"
        ],
        "description": "Title of the article",
        "name": "title"
      },
      {
        "dataType": [
          "text"
        ],
        "description": "The author of the article",
        "name": "author_name"
      }
    ]
  }]
}

client.schema.create(schema)

# Metadata - 1
data_object={
    'title': 'first article title'
}

uuid = client.data_object.create(
    data_object=data_object,
    class_name='Metadata',
    uuid=generate_uuid5('first article title',"Metadata"),
    vector=[0.12345] * 1536
)

print(uuid)  #19e253f5-c5ad-5031-b37e-1c3fb4d1e42c

# Metadata - 2
data_object={
    'author_name': 'second article author name'
}

uuid = client.data_object.create(
    data_object=data_object,
    class_name='Metadata',
    uuid=generate_uuid5('second article author name',"Metadata"),
    vector=[0.12345] * 1536
)

print(uuid)  #ad5646e3-756d-5290-b682-814d3f301bb2


#Article - 1
data_object={
    'article_text': 'first article'
}

uuid = client.data_object.create(
    data_object=data_object,
    class_name='Article',
    uuid=generate_uuid5('first article',"Article"),
    vector=[0.12345] * 1536
)

print(uuid)  #1cb50188-c31d-568c-9c4b-c778363bfd5c

# Article -2
data_object={
    'article_text': 'second article'
}

uuid = client.data_object.create(
    data_object=data_object,
    class_name='Article',
    uuid=generate_uuid5('second article',"Article"),
    vector=[0.12345] * 1536
)

print(uuid)  #314848d1-930a-5110-bfc5-dc7e4b51405f

# add cross references
# first article -> first metadata
client.data_object.reference.add(
    from_class_name="Article",
    from_uuid="1cb50188-c31d-568c-9c4b-c778363bfd5c",
    from_property_name="hasMetadata",
    to_class_name="Metadata",
    to_uuid="19e253f5-c5ad-5031-b37e-1c3fb4d1e42c",
)

#second article -> second metadata
client.data_object.reference.add(
    from_class_name="Article",
    from_uuid="314848d1-930a-5110-bfc5-dc7e4b51405f",
    from_property_name="hasMetadata",
    to_class_name="Metadata",
    to_uuid="ad5646e3-756d-5290-b682-814d3f301bb2",
)

Queries on Article class:

Article - fetch Article objects where Metadata title is null

query Get {
    Get {
        Article(
            where: {path: ["hasMetadata", "Metadata", "title"], valueBoolean: true, operator: IsNull}
        ) {
            article_text
            hasMetadata {
                ... on Metadata {
                    title
                }
            }
        }
    }
}

Response:

{
    "data": {
        "Get": {
            "Article": [
                {
                    "article_text": "first article",
                    "hasMetadata": [
                        {
                            "title": "first article title"
                        }
                    ]
                },
                {
                    "article_text": "second article",
                    "hasMetadata": [
                        {
                            "title": null
                        }
                    ]
                }
            ]
        }
    }
}

Article - fetch Article objects where Metadata title is not null

query Get {
    Get {
        Article(
            where: {path: ["hasMetadata", "Metadata", "title"], valueBoolean: false, operator: IsNull}
        ) {
            article_text
            hasMetadata {
                ... on Metadata {
                    title
                }
            }
        }
    }
}

Response:

{
    "data": {
        "Get": {
            "Article": []
        }
    }
}

Queries on Metadata class:

Metadata - fetch Metadata where title is null

query Get {
    Get {
        Metadata(where: {operator: IsNull, path: ["title"], valueBoolean: true}) {
            author_name
            title
        }
    }
}

Response:

{
    "data": {
        "Get": {
            "Metadata": [
                {
                    "author_name": "second article author name",
                    "title": null
                }
            ]
        }
    }
}

Metadata - fetch Metadata where title is notnull

query Get {
    Get {
        Metadata(where: {operator: IsNull, path: ["title"], valueBoolean: false}) {
            author_name
            title
        }
    }
}

Response:

{
    "data": {
        "Get": {
            "Metadata": [
                {
                    "author_name": null,
                    "title": "first article title"
                }
            ]
        }
    }
}

hey @vamsi,
Thank you for sharing all the examples. :pray:
I talked with one of our engineers, and he suspects this might be a bug.
We are investigating the issue.

I will give you a shout once I hear more :+1:
Hopefully, we can find a fix and release an update :wink:

1 Like

Hi @vamsi, this was a bug and we found a fix for it. Which will be included in the next minor release.

Here is the PR with the fix.

Thank you for helping us identify the issue. :slightly_smiling_face:

1 Like

Hi @sebawita I have encountered same problem on weaviate 1.23

hi @2020ashish

I have checked, and this is fixed since 1.19, check here a working python notebook:

Note that you need to explicitly create your class with:

    "invertedIndexConfig": {
        "indexNullState": True
    },

as stated here:

Let me know if that works for you :slight_smile:

@DudaNogueira

Here is my code :
I was checking if this work for "hasCategory"
where_filter = {
    "path": ["hasCategory"],
    "operator": "IsNull",
    # "valueBoolean": True,
    "valueBoolean": False
}
It shows anonymous behaviour

Here is query`where_filter = {
“path”: [“hasCategory”],
“operator”: “IsNull”,
# “valueBoolean”: True,
“valueBoolean”: False
}

response = (
client.query
.get(“JeopardyQuestion”, [“question”, “answer”, “hasCategory {… on JeopardyCategory{title field}}”])
.with_where(where_filter)
.with_limit(10)
.do()
)

print(json.dumps(response, indent=2))`

Response : {
“data”: {
“Get”: {
“JeopardyQuestion”:
}
}
}

When i changed "where_filter = {
“path”: [“hasCategory”],
“operator”: “IsNull”,
“valueBoolean”: True,
# “valueBoolean”: False
}

response = (
client.query
.get(“JeopardyQuestion”, [“question”, “answer”, “hasCategory {… on JeopardyCategory{title field}}”])
.with_where(where_filter)
.with_limit(10)
.do()
)

print(json.dumps(response, indent=2))"
reponse : {
“data”: {
“Get”: {
“JeopardyQuestion”: [
{
“answer”: “the answer about one thing”,
“hasCategory”: [
{
“field”: null,
“title”: “Animals”
}
],
“question”: “Some question about one thing”
},
{
“answer”: “the answer about other thing”,
“hasCategory”: [
{
“field”: “something”,
“title”: “Places”
}
],
“question”: “Some question about other thing”
},
{
“answer”: “the answer about Name”,
“hasCategory”: null,
“question”: “Some question about Name”
}
]
}
}
}

Hi!

this is how you can filter by “no cross reference”

where_filter = {
    "path": ["hasCategory"],
    "operator": "Equal",
    "valueInt": 0
}

this is the equivalent for pyv4:

import weaviate
import weaviate.classes as wcs

client =weaviate.connect_to_local()
client.collections.get("JeopardyQuestion")

items = collection.query.fetch_objects(
    filters=wcs.Filter("hasCategory").equal(0)
)

with the latest beta you can do

Filter.by_ref_count("hasCategory").equal(0)
2 Likes