This might be a newbie question.
I am trying keyword search, not semantic search. keyword search itself seems working but those scores are always 0.0
. This is a correct behavior? Am I missing something?
example:
from datasets import load_dataset
dataset = load_dataset('jeopardy', split='train').shuffle(seed=42)
sample_dataset = dataset.select(range(500))
sample_data = [{col: row[col] for col in ["category", "question", "answer"]} for row in sample_dataset]
import weaviate
import weaviate.classes as wvc
import os
from google.colab import userdata
client = weaviate.connect_to_wcs(
cluster_url=userdata.get('WEAVIATE_CLUSTER_URL'),
auth_credentials=weaviate.auth.AuthApiKey(userdata.get('WEAVIATE_API_KEY')),
)
questions = client.collections.create(
name="Question",
)
question_objs = list()
for i, d in enumerate(sample_data):
question_objs.append({
"answer": d["answer"],
"question": d["question"],
"category": d["category"],
})
questions.data.insert_many(question_objs)
response = questions.query.bm25(
query="america",
query_properties=["question","answer","category"],
return_metadata=wvc.query.MetadataQuery(score=True, explain_score=True),
limit=10
)
for r in response.objects:
print(r.metadata.score)
print(r.metadata.explain_score)
print(r.properties)
print()
result:
0.0
, BM25F_america_frequency:1, BM25F_america_propLength:2
{'answer': 'Jean Lafitte', 'question': "'A Natl. Historical Park & Preserve named for this pirate includes the site of the Battle of New Orleans'", 'category': 'HISTORIC AMERICA'}
0.0
, BM25F_america_frequency:1, BM25F_america_propLength:3
{'answer': 'Rockford', 'question': '\'The "files" on this large Illinois city include its historic leadership in screw production\'', 'category': 'AMERICA THE BEAUTIFUL'}
0.0
, BM25F_america_propLength:3, BM25F_america_frequency:1
{'answer': '"God Bless America"', 'question': '\'This Irving Berlin song has been called "The Nation\'s Unofficial Second National Anthem"\'', 'category': 'IRVING BERLIN'}
0.0
, BM25F_america_frequency:3, BM25F_america_propLength:17
{'answer': 'law', 'question': '\'"America! America! God mend thine every flaw/ Confirm thy soul in self-control, thy liberty in" this\'', 'category': 'AMERICA THE BEAUTIFUL'}
0.0
, BM25F_america_frequency:1, BM25F_america_propLength:11
{'answer': 'Milwaukee', 'question': '\'This city on Lake Michigan is "The Beer Capital of America"\'', 'category': 'AMERICAN CITIES'}
0.0
, BM25F_america_frequency:1, BM25F_america_propLength:16
{'answer': 'CNN Headline News', 'question': "'Every half hour since 1982, this CNN network has updated America on news, sports, business & entertainment'", 'category': 'CNN'}
Thank you.