Collection Reference turned into UUID, and couldn't be link

Description

I have a schema.json:

{
  "classes": [
    {
      "class": "Pack",
      "properties": [
        {
          "name": "pack_name",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "version",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "author",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "website",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "state",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "date",
          "dataType": [
            "date"
          ]
        }
      ]
    },
    {
      "class": "TextFile",
      "properties": [
        {
          "name": "path_in_pack",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "simhash",
          "dataType": [
            "vector"
          ]
        },
        {
          "name": "sequences",
          "dataType": [
            "string[]"
          ]
        },
        {
          "name": "belongs_to_pack",
          "dataType": [
            "Pack"
          ]
        },
        {
          "name": "md5",
          "dataType": [
            "string"
          ]
        }
      ],
      "vectorIndexConfig": {
        "distance": "hamming"
      }
    },
    {
      "class": "ImageFile",
      "properties": [
        {
          "name": "md5",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "path_in_pack",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "histogram",
          "dataType": [
            "vector"
          ]
        },
        {
          "name": "phash",
          "dataType": [
            "vector"
          ]
        },
        {
          "name": "lbp_features",
          "dataType": [
            "vector"
          ]
        },
        {
          "name": "belongs_to_pack",
          "dataType": [
            "Pack"
          ]
        }
      ]
    },
    {
      "class": "AudioFile",
      "properties": [
        {
          "name": "path_in_pack",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "belongs_to_pack",
          "dataType": [
            "Pack"
          ]
        },
        {
          "name": "md5",
          "dataType": [
            "string"
          ]
        }
      ]
    }
  ]
}

and I load the schema with

self.client = weaviate.Client(WEAVIATE_URL)
        try:
            self.load_schema('database/schema.gql')
        except Exception as e:
            print(e)

If GET the schemas

            schema = self.client.schema.get()
{
    "classes": [
        {
            "class": "TextFile",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": null,
                    "preset": "en",
                    "removals": null
                }
            },
            "multiTenancyConfig": {
                "autoTenantActivation": false,
                "autoTenantCreation": false,
                "enabled": false
            },
            "properties": [
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "path_in_pack",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text[]"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "sequences",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "md5",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "uuid"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterable": true,
                    "indexSearchable": false,
                    "name": "belongs_to_pack"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "file_path",
                    "tokenization": "word"
                },
                {
                    "dataType": [
                        "number[]"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterable": true,
                    "indexSearchable": false,
                    "name": "simhash"
                }
            ],
            "replicationConfig": {
                "factor": 1
            },
            "shardingConfig": {
                "actualCount": 1,
                "actualVirtualCount": 128,
                "desiredCount": 1,
                "desiredVirtualCount": 128,
                "function": "murmur3",
                "key": "_id",
                "strategy": "hash",
                "virtualPerPhysical": 128
            },
            "vectorIndexConfig": {
                "bq": {
                    "enabled": false
                },
                "cleanupIntervalSeconds": 300,
                "distance": "hamming",
                "dynamicEfFactor": 8,
                "dynamicEfMax": 500,
                "dynamicEfMin": 100,
                "ef": -1,
                "efConstruction": 128,
                "flatSearchCutoff": 40000,
                "maxConnections": 64,
                "pq": {
                    "bitCompression": false,
                    "centroids": 256,
                    "enabled": false,
                    "encoder": {
                        "distribution": "log-normal",
                        "type": "kmeans"
                    },
                    "segments": 0,
                    "trainingLimit": 100000
                },
                "skip": false,
                "vectorCacheMaxObjects": 1000000000000
            },
            "vectorIndexType": "hnsw",
            "vectorizer": "none"
        },
        {
            "class": "AudioFile",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": null,
                    "preset": "en",
                    "removals": null
                }
            },
            "multiTenancyConfig": {
                "autoTenantActivation": false,
                "autoTenantCreation": false,
                "enabled": false
            },
            "properties": [
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "path_in_pack",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "md5",
                    "tokenization": "whitespace"
                }
            ],
            "replicationConfig": {
                "factor": 1
            },
            "shardingConfig": {
                "actualCount": 1,
                "actualVirtualCount": 128,
                "desiredCount": 1,
                "desiredVirtualCount": 128,
                "function": "murmur3",
                "key": "_id",
                "strategy": "hash",
                "virtualPerPhysical": 128
            },
            "vectorIndexConfig": {
                "bq": {
                    "enabled": false
                },
                "cleanupIntervalSeconds": 300,
                "distance": "cosine",
                "dynamicEfFactor": 8,
                "dynamicEfMax": 500,
                "dynamicEfMin": 100,
                "ef": -1,
                "efConstruction": 128,
                "flatSearchCutoff": 40000,
                "maxConnections": 64,
                "pq": {
                    "bitCompression": false,
                    "centroids": 256,
                    "enabled": false,
                    "encoder": {
                        "distribution": "log-normal",
                        "type": "kmeans"
                    },
                    "segments": 0,
                    "trainingLimit": 100000
                },
                "skip": false,
                "vectorCacheMaxObjects": 1000000000000
            },
            "vectorIndexType": "hnsw",
            "vectorizer": "none"
        },
        {
            "class": "ImageFile",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": null,
                    "preset": "en",
                    "removals": null
                }
            },
            "multiTenancyConfig": {
                "autoTenantActivation": false,
                "autoTenantCreation": false,
                "enabled": false
            },
            "properties": [
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "md5",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "path_in_pack",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "file_path",
                    "tokenization": "word"
                },
                {
                    "dataType": [
                        "number[]"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterable": true,
                    "indexSearchable": false,
                    "name": "phash"
                },
                {
                    "dataType": [
                        "number[]"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterable": true,
                    "indexSearchable": false,
                    "name": "lbp_features"
                },
                {
                    "dataType": [
                        "uuid"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterabl53 2024",
                    "indexFilterable": true,
                    "indexSearchable": false,
                    "name": "belongs_to_pack"
                }
            ],
            "replicationConfig": {
                "factor": 1
            },
            "shardingConfig": {
                "actualCount": 1,
                "actualVirtualCount": 128,
                "desiredCount": 1,
                "desiredVirtualCount": 128,
                "function": "murmur3",
                "key": "_id",
                "strategy": "hash",
                "virtualPerPhysical": 128
            },
            "vectorIndexConfig": {
                "bq": {
                    "enabled": false
                },
                "cleanupIntervalSeconds": 300,
                "distance": "cosine",
                "dynamicEfFactor": 8,
                "dynamicEfMax": 500,
                "dynamicEfMin": 100,
                "ef": -1,
                "efConstruction": 128,
                "flatSearchCutoff": 40000,
                "maxConnections": 64,
                "pq": {
                    "bitCompression": false,
                    "centroids": 256,
                    "enabled": false,
                    "encoder": {
                        "distribution": "log-normal",
                        "type": "kmeans"
                    },
                    "segments": 0,
                    "trainingLimit": 100000
                },
                "skip": false,
                "vectorCacheMaxObjects": 1000000000000
            },
            "vectorIndexType": "hnsw",
            "vectorizer": "none"
        },
        {
            "class": "Pack",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": null,
                    "preset": "en",
                    "removals": null
                }
            },
            "multiTenancyConfig": {
                "autoTenantActivation": false,
                "autoTenantCreation": false,
                "enabled": false
            },
            "properties": [
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "pack_name",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "version",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "author",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "website",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "text"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": true,
                    "name": "state",
                    "tokenization": "whitespace"
                },
                {
                    "dataType": [
                        "date"
                    ],
                    "indexFilterable": true,
                    "indexSearchable": false,
                    "name": "date"
                }
            ],
            "replicationConfig": {
                "factor": 1
            },
            "shardingConfig": {
                "actualCount": 1,
                "actualVirtualCount": 128,
                "desiredCount": 1,
                "desiredVirtualCount": 128,
                "function": "murmur3",
                "key": "_id",
                "strategy": "hash",
                "virtualPerPhysical": 128
            },
            "vectorIndexConfig": {
                "bq": {
                    "enabled": false
                },
                "cleanupIntervalSeconds": 300,
                "distance": "cosine",
                "dynamicEfFactor": 8,
                "dynamicEfMax": 500,
                "dynamicEfMin": 100,
                "ef": -1,
                "efConstruction": 128,
                "flatSearchCutoff": 40000,
                "maxConnections": 64,
                "pq": {
                    "bitCompression": false,
                    "centroids": 256,
                    "enabled": false,
                    "encoder": {
                        "distribution": "log-normal",
                        "type": "kmeans"
                    },
                    "segments": 0,
                    "trainingLimit": 100000
                },
                "skip": false,
                "vectorCacheMaxObjects": 1000000000000
            },
            "vectorIndexType": "hnsw",
            "vectorizer": "none"
        }
    ]
}

You can tall that the class references turned into UUID

{
                    "dataType": [
                        "uuid"
                    ],
                    "description": "This property was generated by Weaviate's auto-schema feature on Fri Sep 20 12:16:53 2024",
                    "indexFilterabl53 2024",
                    "indexFilterable": true,
                    "indexSearchable": false,
                    "name": "belongs_to_pack"
                }

But when I query the references, wether use python sdk(v3) or use native gql:

self.client.query.get("TextFile", ["path_in_pack",
# LinkTo(
#     link_on="belongs_to_pack",           # 要查询的字段
#     linked_class="Pack",                 # 引用的类
#     properties=["pack_name"]             # 查询该引用类中的字段
# )
"""
belongs_to_pack {
    name
}"""
]).with_near_vector(query).do()

It errors:

{'errors': [{'locations': [{'column': 17, 'line': 2}], 'message': 'Field "belongs_to_pack" of type "String" must not have a sub selection.', 'path': None}]}

How to solve this, pls.

Server Setup Information

  • Weaviate Server Version: {"action":"startup","build_git_commit":"447949c","build_go_version":"go1.22.7","build_image_tag":"1.25.17","build_wv_version":"1.25.17","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-09-17T17:20:52Z"}
  • Deployment Method: docker
  • Number of Running Nodes: 1
  • Client Language and Version: python weaviate==0.1.0 weaviate-client==4.8.1
  • Multitenancy?:

Any additional Information

Hi!

I was not able to create the collections using this schema.

I got this error, due to the data type “vector” that is not valid:

        {
          "name": "simhash",
          "dataType": [
            "vector"
          ]
        },

this was the error:

UnexpectedStatusCodeError: Collection may not have been created properly.! Unexpected status code: 422, with response body: {‘error’: [{‘message’: “property ‘simhash’: invalid dataType: unknown primitive data type ‘vector’”}]}.

I was able to comment that property, and create the class, however it was working properly.

I have used python v4 syntax (you are using python v3). Here is what I have done:

import weaviate
client = weaviate.connect_to_local()
print(weaviate.__version__, client.get_meta().get("version"))

schema = {
  "classes": [
    {
      "class": "Pack",
      "properties": [
        {
          "name": "pack_name",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "version",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "author",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "website",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "state",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "date",
          "dataType": [
            "date"
          ]
        }
      ]
    },
    {
      "class": "TextFile",
      "properties": [
        {
          "name": "path_in_pack",
          "dataType": [
            "string"
          ]
        },
        # {
        #   "name": "simhash",
        #   "dataType": [
        #     "vector"
        #   ]
        # },
        {
          "name": "sequences",
          "dataType": [
            "string[]"
          ]
        },
        {
          "name": "belongs_to_pack",
          "dataType": [
            "Pack"
          ]
        },
        {
          "name": "md5",
          "dataType": [
            "string"
          ]
        }
      ],
      "vectorIndexConfig": {
        "distance": "hamming"
      }
    },
    {
      "class": "ImageFile",
      "properties": [
        {
          "name": "md5",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "path_in_pack",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "histogram",
          "dataType": [
            "vector"
          ]
        },
        {
          "name": "phash",
          "dataType": [
            "vector"
          ]
        },
        {
          "name": "lbp_features",
          "dataType": [
            "vector"
          ]
        },
        {
          "name": "belongs_to_pack",
          "dataType": [
            "Pack"
          ]
        }
      ]
    },
    {
      "class": "AudioFile",
      "properties": [
        {
          "name": "path_in_pack",
          "dataType": [
            "string"
          ]
        },
        {
          "name": "belongs_to_pack",
          "dataType": [
            "Pack"
          ]
        },
        {
          "name": "md5",
          "dataType": [
            "string"
          ]
        }
      ]
    }
  ]
}

c = client.collections.create_from_dict(schema["classes"][0])
c = client.collections.create_from_dict(schema["classes"][1])

we have now created the first and second classes.

And the property that is a cross reference is correctly created:

collection = client.collections.get("TextFile")
for p in collection.config.get().references:
    print("----")
    print(p)

here is the output:

_ReferenceProperty(name='belongs_to_pack', description=None, target_collections=['Pack'])

Let me know if this hels!

Thanks!