'text2vec-transformers': invalid combination of properties"

Hi,

I setup weaviate with follow docker compose:

version: "3.4"
services:
  weaviate:
    image: semitechnologies/weaviate:1.25.10
    ports:
      - "8088:8080"
      - "50051:50051"
    volumes:
      - ./data:/var/lib/weaviate
    restart: on-failure:0
    networks:
      - weaviate_default
    environment:
      TRANSFORMERS_INFERENCE_API: 'http://t2v-transformers:8080'
      RERANKER_INFERENCE_API: 'http://reranker-transformers:8080'
      QUERY_DEFAULTS_LIMIT: 25
      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
      PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
      DEFAULT_VECTORIZER_MODULE: 'text2vec-transformers'
      ENABLE_MODULES: 'text2vec-transformers,reranker-transformers'
      CLUSTER_HOSTNAME: 'node1'
  t2v-transformers: 
    image: semitechnologies/transformers-inference:baai-bge-m3-onnx
    networks:
      - weaviate_default
    environment:
      ENABLE_CUDA: 0 # set to 1 to enable
  reranker-transformers:
    build:
      context: reranker-transformers-1.1.1
      dockerfile: Dockerfile
      args:
        HF_ENDPOINT: "https://hf-mirror.com"
        MODEL_NAME: "BAAI/bge-reranker-large"
    image: weaviate-reranker-transformers:latest
    networks:
      - weaviate_default
    environment:
      ENABLE_CUDA: '0'
networks:
  weaviate_default:
    driver: bridge

And I connet it via weaviate client, which version is 4.7.1, with following code

client = weaviate.connect_to_custom(
        http_host=os.getenv('WEAVIATE_HOST'),
        http_port=int(os.getenv('WEAVIATE_HTTP_PORT').strip()),
        http_secure=False,
        grpc_host=os.getenv('WEAVIATE_HOST'),
        grpc_port=int(os.getenv('WEAVIATE_GRPC_PORT').strip()),
        grpc_secure=False,
        additional_config=AdditionalConfig(
            timeout=Timeout(init=30, query=60, insert=120)  # Values in seconds
        ),
    )

When I use following code to create a collection, got a Error with status code 422, and response body {‘error’: [{‘message’: “module ‘text2vec-transformers’: invalid combination of properties”}]}.

client.collections.create(
                name=index_name,
                properties=[
                    wvc.config.Property(name='j_key', data_type=wvc.config.DataType.INT,
                                        index_filterable=True,
                                        index_searchable=False,
                                        skip_vectorization=True,
                                        vectorize_property_name=False,
                                        ),
                ],
                vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_transformers(
                    vectorize_collection_name=False,
                    inference_url='http://t2v-transformers:8080',

                ),

            )

And the client.get_meta() ,return

{
    "hostname": "http://[::]:8080",
    "modules": {
        "reranker-transformers": {
            "model": {
                "_name_or_path": "./models/model",
                "add_cross_attention": false,
                "architectures": [
                    "XLMRobertaForSequenceClassification"
                ],
                "attention_probs_dropout_prob": 0.1,
                "bad_words_ids": null,
                "begin_suppress_tokens": null,
                "bos_token_id": 0,
                "chunk_size_feed_forward": 0,
                "classifier_dropout": null,
                "cross_attention_hidden_size": null,
                "decoder_start_token_id": null,
                "diversity_penalty": 0,
                "do_sample": false,
                "early_stopping": false,
                "encoder_no_repeat_ngram_size": 0,
                "eos_token_id": 2,
                "exponential_decay_length_penalty": null,
                "finetuning_task": null,
                "forced_bos_token_id": null,
                "forced_eos_token_id": null,
                "hidden_act": "gelu",
                "hidden_dropout_prob": 0.1,
                "hidden_size": 1024,
                "id2label": {
                    "0": "LABEL_0"
                },
                "initializer_range": 0.02,
                "intermediate_size": 4096,
                "is_decoder": false,
                "is_encoder_decoder": false,
                "label2id": {
                    "LABEL_0": 0
                },
                "layer_norm_eps": 1e-05,
                "length_penalty": 1,
                "max_length": 20,
                "max_position_embeddings": 514,
                "min_length": 0,
                "model_type": "xlm-roberta",
                "no_repeat_ngram_size": 0,
                "num_attention_heads": 16,
                "num_beam_groups": 1,
                "num_beams": 1,
                "num_hidden_layers": 24,
                "num_return_sequences": 1,
                "output_attentions": false,
                "output_hidden_states": false,
                "output_past": true,
                "output_scores": false,
                "pad_token_id": 1,
                "position_embedding_type": "absolute",
                "prefix": null,
                "problem_type": null,
                "pruned_heads": {},
                "remove_invalid_values": false,
                "repetition_penalty": 1,
                "return_dict": true,
                "return_dict_in_generate": false,
                "sep_token_id": null,
                "suppress_tokens": null,
                "task_specific_params": null,
                "temperature": 1,
                "tf_legacy_loss": false,
                "tie_encoder_decoder": false,
                "tie_word_embeddings": true,
                "tokenizer_class": null,
                "top_k": 50,
                "top_p": 1,
                "torch_dtype": "float32",
                "torchscript": false,
                "transformers_version": "4.41.2",
                "type_vocab_size": 1,
                "typical_p": 1,
                "use_bfloat16": false,
                "use_cache": true,
                "vocab_size": 250002
            }
        },
        "text2vec-transformers": {
            "model": {
                "_name_or_path": "./models/model",
                "add_cross_attention": false,
                "architectures": [
                    "XLMRobertaModel"
                ],
                "attention_probs_dropout_prob": 0.1,
                "bad_words_ids": null,
                "begin_suppress_tokens": null,
                "bos_token_id": 0,
                "chunk_size_feed_forward": 0,
                "classifier_dropout": null,
                "cross_attention_hidden_size": null,
                "decoder_start_token_id": null,
                "diversity_penalty": 0,
                "do_sample": false,
                "early_stopping": false,
                "encoder_no_repeat_ngram_size": 0,
                "eos_token_id": 2,
                "exponential_decay_length_penalty": null,
                "finetuning_task": null,
                "forced_bos_token_id": null,
                "forced_eos_token_id": null,
                "hidden_act": "gelu",
                "hidden_dropout_prob": 0.1,
                "hidden_size": 1024,
                "id2label": {
                    "0": "LABEL_0",
                    "1": "LABEL_1"
                },
                "initializer_range": 0.02,
                "intermediate_size": 4096,
                "is_decoder": false,
                "is_encoder_decoder": false,
                "label2id": {
                    "LABEL_0": 0,
                    "LABEL_1": 1
                },
                "layer_norm_eps": 1e-05,
                "length_penalty": 1,
                "max_length": 20,
                "max_position_embeddings": 8194,
                "min_length": 0,
                "model_type": "xlm-roberta",
                "no_repeat_ngram_size": 0,
                "num_attention_heads": 16,
                "num_beam_groups": 1,
                "num_beams": 1,
                "num_hidden_layers": 24,
                "num_return_sequences": 1,
                "output_attentions": false,
                "output_hidden_states": false,
                "output_past": true,
                "output_scores": false,
                "pad_token_id": 1,
                "position_embedding_type": "absolute",
                "prefix": null,
                "problem_type": null,
                "pruned_heads": {},
                "remove_invalid_values": false,
                "repetition_penalty": 1,
                "return_dict": true,
                "return_dict_in_generate": false,
                "sep_token_id": null,
                "suppress_tokens": null,
                "task_specific_params": null,
                "temperature": 1,
                "tf_legacy_loss": false,
                "tie_encoder_decoder": false,
                "tie_word_embeddings": true,
                "tokenizer_class": null,
                "top_k": 50,
                "top_p": 1,
                "torch_dtype": "float32",
                "torchscript": false,
                "transformers_version": "4.39.3",
                "type_vocab_size": 1,
                "typical_p": 1,
                "use_bfloat16": false,
                "use_cache": true,
                "vocab_size": 250002
            }
        }
    },
    "version": "1.25.10"
}

Please help.

Thanks.

hi @vk_Cheung !!

Welcome to our community :hugs:

The problem here is that you have nothing to vectorize.

You don’t vectorize the collection name nor the property name. and the only property you have is an integer and is also marked to skip.

However, this seems to be a bug in the text2vec_transformers integration, as the very same collection, if setting the text2vec-openai will work without any problem.

As a workaround, for now, try adding a second collection that will be vectorized, or setting vectorize_collection_name=True.

I will escalate this to check if this is indeed a bug.

Thanks!

Thanks @DudaNogueira.

I change the value of vectorizer_config, and it work for me.

weaviate_client.collections.create(
        name=index_name,
        properties=[
            wvc.config.Property(name='j_key', data_type=wvc.config.DataType.INT,
                                index_filterable=True,
                                index_searchable=False,
                                skip_vectorization=True,
                                vectorize_property_name=False,
                                ),
        ],
        vectorizer_config=[
            wvc.config.Configure.NamedVectors.text2vec_transformers(
                name=field_name,
                source_properties=["text"]
            )],

    )
1 Like