fix: fix hybrid search example for pydantic v1 (#263)

qdrant · Jun 14, 2024 · e071c84 · e071c84
1 parent e1ecfe9
commit e071c84
Showing 1 changed file with 30 additions and 11 deletions.
diff --git a/docs/examples/Hybrid_Search.ipynb b/docs/examples/Hybrid_Search.ipynb
@@ -328,7 +328,9 @@
    ],
    "source": [
     "source_df = dataset.to_pandas()\n",
-    "df = source_df.drop_duplicates(subset=[\"product_text\", \"product_title\", \"product_bullet_point\", \"product_brand\"])\n",
+    "df = source_df.drop_duplicates(\n",
+    "    subset=[\"product_text\", \"product_title\", \"product_bullet_point\", \"product_brand\"]\n",
+    ")\n",
     "df = df.dropna(subset=[\"product_text\", \"product_title\", \"product_bullet_point\", \"product_brand\"])\n",
     "df.head()"
    ]
@@ -367,7 +369,9 @@
    },
    "outputs": [],
    "source": [
-    "df[\"combined_text\"] = df[\"product_title\"] + \"\\n\" + df[\"product_text\"] + \"\\n\" + df[\"product_bullet_point\"]"
+    "df[\"combined_text\"] = (\n",
+    "    df[\"product_title\"] + \"\\n\" + df[\"product_text\"] + \"\\n\" + df[\"product_bullet_point\"]\n",
+    ")"
    ]
   },
   {
@@ -489,7 +493,9 @@
     "    return list(sparse_model.embed(texts, batch_size=32))\n",
     "\n",
     "\n",
-    "sparse_embedding: List[SparseEmbedding] = make_sparse_embedding([\"Fastembed is a great library for text embeddings!\"])\n",
+    "sparse_embedding: List[SparseEmbedding] = make_sparse_embedding(\n",
+    "    [\"Fastembed is a great library for text embeddings!\"]\n",
+    ")\n",
     "sparse_embedding"
    ]
   },
@@ -628,7 +634,9 @@
     "        token_weight_dict[token] = weight\n",
     "\n",
     "    # Sort the dictionary by weights\n",
-    "    token_weight_dict = dict(sorted(token_weight_dict.items(), key=lambda item: item[1], reverse=True))\n",
+    "    token_weight_dict = dict(\n",
+    "        sorted(token_weight_dict.items(), key=lambda item: item[1], reverse=True)\n",
+    "    )\n",
     "    return token_weight_dict\n",
     "\n",
     "\n",
@@ -870,14 +878,21 @@
     "    dense_vectors = df[\"dense_embedding\"].tolist()\n",
     "    rows = df.to_dict(orient=\"records\")\n",
     "    points = []\n",
-    "    for idx, (text, sparse_vector, dense_vector) in enumerate(zip(product_texts, sparse_vectors, dense_vectors)):\n",
-    "        sparse_vector = SparseVector(indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist())\n",
+    "    for idx, (text, sparse_vector, dense_vector) in enumerate(\n",
+    "        zip(product_texts, sparse_vectors, dense_vectors)\n",
+    "    ):\n",
+    "        sparse_vector = SparseVector(\n",
+    "            indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist()\n",
+    "        )\n",
     "        point = PointStruct(\n",
     "            id=idx,\n",
-    "            payload={\"text\": text, \"product_id\": rows[idx][\"product_id\"]},  # Add any additional payload if necessary\n",
+    "            payload={\n",
+    "                \"text\": text,\n",
+    "                \"product_id\": rows[idx][\"product_id\"],\n",
+    "            },  # Add any additional payload if necessary\n",
     "            vector={\n",
     "                \"text-sparse\": sparse_vector,\n",
-    "                \"text-dense\": dense_vector,\n",
+    "                \"text-dense\": dense_vector.tolist(),\n",
     "            },\n",
     "        )\n",
     "        points.append(point)\n",
@@ -936,7 +951,7 @@
     "            SearchRequest(\n",
     "                vector=NamedVector(\n",
     "                    name=\"text-dense\",\n",
-    "                    vector=query_dense_vector[0],\n",
+    "                    vector=query_dense_vector[0].tolist(),\n",
     "                ),\n",
     "                limit=10,\n",
     "                with_payload=True,\n",
@@ -1133,8 +1148,12 @@
     }
    ],
    "source": [
-    "def find_point_by_id(client: QdrantClient, collection_name: str, rrf_rank_list: List[Tuple[int, float]]):\n",
-    "    return client.retrieve(collection_name=collection_name, ids=[item[0] for item in rrf_rank_list])\n",
+    "def find_point_by_id(\n",
+    "    client: QdrantClient, collection_name: str, rrf_rank_list: List[Tuple[int, float]]\n",
+    "):\n",
+    "    return client.retrieve(\n",
+    "        collection_name=collection_name, ids=[item[0] for item in rrf_rank_list]\n",
+    "    )\n",
     "\n",
     "\n",
     "find_point_by_id(client, collection_name, rrf_rank_list)"