Skip to content

Commit

Permalink
fix: fix hybrid search example for pydantic v1 (#263)
Browse files Browse the repository at this point in the history
  • Loading branch information
joein committed Jun 14, 2024
1 parent e1ecfe9 commit e071c84
Showing 1 changed file with 30 additions and 11 deletions.
41 changes: 30 additions & 11 deletions docs/examples/Hybrid_Search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,9 @@
],
"source": [
"source_df = dataset.to_pandas()\n",
"df = source_df.drop_duplicates(subset=[\"product_text\", \"product_title\", \"product_bullet_point\", \"product_brand\"])\n",
"df = source_df.drop_duplicates(\n",
" subset=[\"product_text\", \"product_title\", \"product_bullet_point\", \"product_brand\"]\n",
")\n",
"df = df.dropna(subset=[\"product_text\", \"product_title\", \"product_bullet_point\", \"product_brand\"])\n",
"df.head()"
]
Expand Down Expand Up @@ -367,7 +369,9 @@
},
"outputs": [],
"source": [
"df[\"combined_text\"] = df[\"product_title\"] + \"\\n\" + df[\"product_text\"] + \"\\n\" + df[\"product_bullet_point\"]"
"df[\"combined_text\"] = (\n",
" df[\"product_title\"] + \"\\n\" + df[\"product_text\"] + \"\\n\" + df[\"product_bullet_point\"]\n",
")"
]
},
{
Expand Down Expand Up @@ -489,7 +493,9 @@
" return list(sparse_model.embed(texts, batch_size=32))\n",
"\n",
"\n",
"sparse_embedding: List[SparseEmbedding] = make_sparse_embedding([\"Fastembed is a great library for text embeddings!\"])\n",
"sparse_embedding: List[SparseEmbedding] = make_sparse_embedding(\n",
" [\"Fastembed is a great library for text embeddings!\"]\n",
")\n",
"sparse_embedding"
]
},
Expand Down Expand Up @@ -628,7 +634,9 @@
" token_weight_dict[token] = weight\n",
"\n",
" # Sort the dictionary by weights\n",
" token_weight_dict = dict(sorted(token_weight_dict.items(), key=lambda item: item[1], reverse=True))\n",
" token_weight_dict = dict(\n",
" sorted(token_weight_dict.items(), key=lambda item: item[1], reverse=True)\n",
" )\n",
" return token_weight_dict\n",
"\n",
"\n",
Expand Down Expand Up @@ -870,14 +878,21 @@
" dense_vectors = df[\"dense_embedding\"].tolist()\n",
" rows = df.to_dict(orient=\"records\")\n",
" points = []\n",
" for idx, (text, sparse_vector, dense_vector) in enumerate(zip(product_texts, sparse_vectors, dense_vectors)):\n",
" sparse_vector = SparseVector(indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist())\n",
" for idx, (text, sparse_vector, dense_vector) in enumerate(\n",
" zip(product_texts, sparse_vectors, dense_vectors)\n",
" ):\n",
" sparse_vector = SparseVector(\n",
" indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist()\n",
" )\n",
" point = PointStruct(\n",
" id=idx,\n",
" payload={\"text\": text, \"product_id\": rows[idx][\"product_id\"]}, # Add any additional payload if necessary\n",
" payload={\n",
" \"text\": text,\n",
" \"product_id\": rows[idx][\"product_id\"],\n",
" }, # Add any additional payload if necessary\n",
" vector={\n",
" \"text-sparse\": sparse_vector,\n",
" \"text-dense\": dense_vector,\n",
" \"text-dense\": dense_vector.tolist(),\n",
" },\n",
" )\n",
" points.append(point)\n",
Expand Down Expand Up @@ -936,7 +951,7 @@
" SearchRequest(\n",
" vector=NamedVector(\n",
" name=\"text-dense\",\n",
" vector=query_dense_vector[0],\n",
" vector=query_dense_vector[0].tolist(),\n",
" ),\n",
" limit=10,\n",
" with_payload=True,\n",
Expand Down Expand Up @@ -1133,8 +1148,12 @@
}
],
"source": [
"def find_point_by_id(client: QdrantClient, collection_name: str, rrf_rank_list: List[Tuple[int, float]]):\n",
" return client.retrieve(collection_name=collection_name, ids=[item[0] for item in rrf_rank_list])\n",
"def find_point_by_id(\n",
" client: QdrantClient, collection_name: str, rrf_rank_list: List[Tuple[int, float]]\n",
"):\n",
" return client.retrieve(\n",
" collection_name=collection_name, ids=[item[0] for item in rrf_rank_list]\n",
" )\n",
"\n",
"\n",
"find_point_by_id(client, collection_name, rrf_rank_list)"
Expand Down

0 comments on commit e071c84

Please sign in to comment.