I am walking through 101T Work with: Text Data Academy and running into an issue on the import data section. Here is my code so far - i have already create the movies collection.
import weaviate
import weaviate.classes.config as wc #make use of classes such as property, datatype, configure - which are defined here
import pandas as pd
import requests
from datetime import datetime, timezone
from weaviate.util import generate_uuid5
from tqdm import tqdm
import os
import json
WCD_DEMO_URL = "demourl"
WCD_DEMO_ADMIN_KEY = "demokey"
OPENAI_APIKEY ="openapikey"
print(f"Your Weaviate client library version is: {weaviate.__version__}.")
headers = {
"X-OpenAI-Api-Key": OPENAI_APIKEY
} # Replace with your OpenAI API key
client = weaviate.connect_to_wcs(
cluster_url=WCD_DEMO_URL, # Replace with your WCD URL
auth_credentials=weaviate.auth.AuthApiKey(WCD_DEMO_ADMIN_KEY) # Replace with your WCD key
)
# Work with the client here - e.g.:
# What thte code means and learnings
# Weaviate can automatically infer the schema but always best to define explicitly so there are no surprises and better control
# If you do not specifiy the vector yourself, weaviate will use a specified vectorizer to generate vector embeddings from your data - in this code we are specifiying text2vec-openai module
# if you wish to use your collection with a generative model (a large language model) you must specifiy the generative module
assert client.is_live()
data_url = "https://raw.githubusercontent.com/weaviate-tutorials/edu-datasets/main/movies_data_1990_2024.json"
resp = requests.get(data_url)
df = pd.DataFrame(resp.json())
# Get the collection
movies = client.collections.get("Movie")
print (movies)
# Enter context manager
with movies.batch.rate_limit(2400) as batch:
# Loop through the data
for i, movie in tqdm(df.iterrows()):
# Convert data types
# Convert a JSON date to `datetime` and add time zone information
release_date = datetime.strptime(movie["release_date"], "%Y-%m-%d").replace(
tzinfo=timezone.utc
)
# Convert a JSON array to a list of integers
genre_ids = json.loads(movie["genre_ids"])
# Build the object payload
movie_obj = {
"title": movie["title"],
"overview": movie["overview"],
"vote_average": movie["vote_average"],
"genre_ids": genre_ids,
"release_date": release_date,
"tmdb_id": movie["id"],
}
# Add object to batch queue
batch.add_object(
properties=movie_obj,
uuid=generate_uuid5(movie["id"])
# references=reference_obj # You can add references here
)
# Batcher automatically sends batches
# Check for failed objects
if len(movies.batch.failed_objects) > 0:
print(f"Failed to import {len(movies.batch.failed_objects)} objects")
client.close() # Close the connection & release resources
Here is my error: