Hello,
I’m beginner in weaviate and I have a problem inserting the data correctly.
Here’s how I created the schema :
curl -X POST "path-to-schema" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer secr3tk3y" \
-d '{
"class": "Comment",
"vectorizer": "text2vec-transformers",
"description": "This class represents comments made by users",
"properties": [
{
"name": "user",
"dataType": ["string"],
"description": "The user who made the comment"
},
{
"name": "comment",
"dataType": ["text"],
"description": "The content of the comment"
}
]
}'
curl -X POST "path-to-schema" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer secr3tk3y" \
-d '{
"class": "Video",
"vectorizer": "text2vec-transformers",
"description": "This class represents videos which can have comments",
"properties": [
{
"name": "link",
"dataType": ["text"],
"description": "The URL of the video"
},
{
"name": "comments",
"dataType": ["Comment"],
"description": "Comments on the video"
}
]
}'
and here’s how I implemented the insertion python
class WeaviateVideoInserter:
def __init__(self, weaviate_url, api_key, model_name='bert-base-uncased'):
auth_config = weaviate.auth.AuthApiKey(api_key=api_key)
self.client = weaviate.Client(url=weaviate_url,auth_client_secret=auth_config)
local_model_path = "...\\huggingface" + "\\" + model_name
self.tokenizer = BertTokenizer.from_pretrained(local_model_path)
self.model = BertModel.from_pretrained(local_model_path)
def text_to_vector(self, text):
inputs = self.tokenizer(text, return_tensors='pt')
outputs = self.model(**inputs)
# Convert the tensor to a 1D list of floats
return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
def create_video_object(self, video_id, video_data):
youtube_link = f"https://www.youtube.com/watch?v={video_id}"
comments_with_vectors = []
for comment in video_data['comments']:
vector = self.text_to_vector(comment['comment'])
comment_object = {
'class': 'Comment',
'properties': {
'user': comment['user'],
'comment': comment['comment'],
'commentVector': vector
}
}
comments_with_vectors.append(comment_object)
# Check if 'title' is in video_data, if not, use an empty string or a default value
video_title = video_data.get('title', 'Default Video Title') # Replace 'Default Video Title' with a suitable default or an empty string
video_object = {
'class': 'Video',
'properties': {
'link': youtube_link,
'comments': comments_with_vectors,
'videoVector': self.text_to_vector(video_title)
}
}
return video_object
def insert_video_data(self, video_object):
# Enregistrement de video_object dans un fichier JSON
with open('video_object.json', 'w') as file:
json.dump(video_object, file, indent=4)
print("video_object enregistré dans video_object.json")
try:
self.client.data_object.create(video_object, video_object['class'])
print("Données insérées avec succès dans Weaviate")
except Exception as e:
print(f"Erreur lors de l'insertion dans Weaviate: {e}")
During debugging, I have the values in ‘link’ and ‘comments’ objects
This is the object before insertion into the database :
{
"class": "Video",
"properties": {
"link": "mylike.com/video/1234567890",
"comments": [
{
"class": "Comment",
"properties": {
"user": "user1",
"comment": "comment1",
"commentVector": [...]
}
},
{
"class": "Comment",
"properties": {
"user": "user2",
"comment": "comment2",
"commentVector": [...]
}
}
],
"videoVector": [...]
}
}
, but when I return to db after insertion, I only have videoVector value.
link and comments are null.
Here’s what’s look like in db :
{
"data": {
"Get": {
"Video": [
{
"comments": null,
"link": null,
"properties": {
"videoVector": [...]
}
},
{
"comments": null,
"link": null,
"properties": {
"videoVector": [...]
}
},
{
"comments": null,
"link": null,
"properties": {
"videoVector": [...]
}
}
]
}
}
}
Does anyone have any idea how I can insert the data correctly?
Thanks in advance