The field is from the class User
with the following schema:
{'class': 'User',
'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
'cleanupIntervalSeconds': 60,
'indexNullState': True,
'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
'moduleConfig': {'text2vec-openai': {'model': 'ada',
'modelVersion': '002',
'type': 'text',
'vectorizeClassName': False}},
'properties': [{'dataType': ['int'],
'description': 'ID of the postgres record of the place',
'indexFilterable': True,
'indexSearchable': False,
'moduleConfig': {'text2vec-openai': {'skip': True,
'vectorizePropertyName': False}},
'name': 'postgresId'},
{'dataType': ['text'],
'description': 'Name of the user',
'indexFilterable': True,
'indexSearchable': True,
'moduleConfig': {'text2vec-openai': {'skip': False,
'vectorizePropertyName': False}},
'name': 'name',
'tokenization': 'word'},
{'dataType': ['text'],
'description': 'Username of the user',
'indexFilterable': True,
'indexSearchable': True,
'moduleConfig': {'text2vec-openai': {'skip': False,
'vectorizePropertyName': False}},
'name': 'username',
'tokenization': 'word'},
{'dataType': ['text'],
'description': 'Email of the place',
'indexFilterable': True,
'indexSearchable': True,
'moduleConfig': {'text2vec-openai': {'skip': True,
'vectorizePropertyName': False}},
'name': 'email',
'tokenization': 'word'},
{'dataType': ['text'],
'description': 'Role of the place',
'indexFilterable': True,
'indexSearchable': True,
'moduleConfig': {'text2vec-openai': {'skip': True,
'vectorizePropertyName': False}},
'name': 'role',
'tokenization': 'word'},
{'dataType': ['Document'],
'description': 'is author of documents',
'indexFilterable': True,
'indexSearchable': False,
'moduleConfig': {'text2vec-openai': {'skip': False,
'vectorizePropertyName': False}},
'name': 'isAuthorOf'},
{'dataType': ['Knowledge'],
'description': 'is mentioned in',
'indexFilterable': True,
'indexSearchable': False,
'moduleConfig': {'text2vec-openai': {'skip': False,
'vectorizePropertyName': False}},
'name': 'isMentionedIn'}],
'replicationConfig': {'factor': 1},
'shardingConfig': {'virtualPerPhysical': 128,
'desiredCount': 1,
'actualCount': 1,
'desiredVirtualCount': 128,
'actualVirtualCount': 128,
'key': '_id',
'strategy': 'hash',
'function': 'murmur3'},
'vectorIndexConfig': {'skip': False,
'cleanupIntervalSeconds': 300,
'maxConnections': 64,
'efConstruction': 128,
'ef': -1,
'dynamicEfMin': 100,
'dynamicEfMax': 500,
'dynamicEfFactor': 8,
'vectorCacheMaxObjects': 1000000000000,
'flatSearchCutoff': 40000,
'distance': 'cosine',
'pq': {'enabled': False,
'bitCompression': False,
'segments': 0,
'centroids': 256,
'encoder': {'type': 'kmeans', 'distribution': 'log-normal'}}},
'vectorIndexType': 'hnsw',
'vectorizer': 'text2vec-openai'}
The code I was using is this:
def create_document_for_learning(client, document_text, author_uuid):
with client.batch as batch:
document_uuid_props = {'content': document_text, 'author_uuid': author_uuid}
document_uuid = generate_uuid5(document_uuid_props, 'Document')
document_does_not_exist = not client.data_object.exists(uuid=document_uuid, class_name='Document')
if document_does_not_exist:
batch.add_data_object(
data_object={'content': document_text},
class_name="Document",
uuid=document_uuid,
)
# document - 'belongsToAuthor' - author reference
batch.add_reference(
from_object_uuid=document_uuid,
from_object_class_name='Document',
from_property_name='belongsToAuthor',
to_object_uuid=author_uuid,
to_object_class_name='User',
)
# author - 'isAuthorOf' - document reference
batch.add_reference(
from_object_uuid=author_uuid,
from_object_class_name='User',
from_property_name='isAuthorOf',
to_object_uuid=document_uuid,
to_object_class_name='Document',
)
return document_uuid