Commit f33056f4 authored by John Wang's avatar John Wang

fix: qdrant original payload

parent aa10bf98
...@@ -49,7 +49,7 @@ class QdrantVectorIndex(BaseVectorIndex): ...@@ -49,7 +49,7 @@ class QdrantVectorIndex(BaseVectorIndex):
return self._dataset.index_struct_dict['vector_store']['collection_name'] return self._dataset.index_struct_dict['vector_store']['collection_name']
dataset_id = dataset.id dataset_id = dataset.id
return "Vector_index_" + dataset_id.replace("-", "_") return "Index_" + dataset_id.replace("-", "_")
def to_index_struct(self) -> dict: def to_index_struct(self) -> dict:
return { return {
...@@ -64,6 +64,7 @@ class QdrantVectorIndex(BaseVectorIndex): ...@@ -64,6 +64,7 @@ class QdrantVectorIndex(BaseVectorIndex):
self._embeddings, self._embeddings,
collection_name=self.get_index_name(self._dataset), collection_name=self.get_index_name(self._dataset),
ids=uuids, ids=uuids,
content_payload_key='text',
**self._client_config.to_qdrant_params() **self._client_config.to_qdrant_params()
) )
...@@ -81,7 +82,8 @@ class QdrantVectorIndex(BaseVectorIndex): ...@@ -81,7 +82,8 @@ class QdrantVectorIndex(BaseVectorIndex):
return QdrantVectorStore( return QdrantVectorStore(
client=client, client=client,
collection_name=self.get_index_name(self._dataset), collection_name=self.get_index_name(self._dataset),
embeddings=self._embeddings embeddings=self._embeddings,
content_payload_key='text'
) )
def _get_vector_store_class(self) -> type: def _get_vector_store_class(self) -> type:
...@@ -96,8 +98,17 @@ class QdrantVectorIndex(BaseVectorIndex): ...@@ -96,8 +98,17 @@ class QdrantVectorIndex(BaseVectorIndex):
vector_store.del_texts(models.Filter( vector_store.del_texts(models.Filter(
must=[ must=[
models.FieldCondition( models.FieldCondition(
key="metadata.document_id", key="doc_id" if self._is_origin() else "metadata.document_id",
match=models.MatchValue(value=document_id), match=models.MatchValue(value=document_id),
), ),
], ],
)) ))
def _is_origin(self):
if self._dataset.index_struct_dict:
class_prefix: str = self._dataset.index_struct_dict['vector_store']['collection_name']
if not class_prefix.strip('Vector_'):
# original class_prefix
return True
return False
from typing import cast from typing import cast, Any
from langchain.schema import Document
from langchain.vectorstores import Qdrant from langchain.vectorstores import Qdrant
from qdrant_client.http.models import Filter, PointIdsList, FilterSelector from qdrant_client.http.models import Filter, PointIdsList, FilterSelector
from qdrant_client.local.qdrant_local import QdrantLocal from qdrant_client.local.qdrant_local import QdrantLocal
...@@ -44,6 +45,24 @@ class QdrantVectorStore(Qdrant): ...@@ -44,6 +45,24 @@ class QdrantVectorStore(Qdrant):
self.client.delete_collection(collection_name=self.collection_name) self.client.delete_collection(collection_name=self.collection_name)
@classmethod
def _document_from_scored_point(
cls,
scored_point: Any,
content_payload_key: str,
metadata_payload_key: str,
) -> Document:
if scored_point.payload.get('doc_id'):
return Document(
page_content=scored_point.payload.get(content_payload_key),
metadata={'doc_id': scored_point.id}
)
return Document(
page_content=scored_point.payload.get(content_payload_key),
metadata=scored_point.payload.get(metadata_payload_key) or {},
)
def _reload_if_needed(self): def _reload_if_needed(self):
if isinstance(self.client, QdrantLocal): if isinstance(self.client, QdrantLocal):
self.client = cast(QdrantLocal, self.client) self.client = cast(QdrantLocal, self.client)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment