Unverified Commit a43e80dd authored by Jyong's avatar Jyong Committed by GitHub

add qdrant migration (#1046)

Co-authored-by: 's avatarjyong <jyong@dify.ai>
parent ad5f27bc
...@@ -329,7 +329,7 @@ def create_qdrant_indexes(): ...@@ -329,7 +329,7 @@ def create_qdrant_indexes():
except Exception: except Exception:
provider = Provider( provider = Provider(
id='provider_id', id='provider_id',
tenant_id='tenant_id', tenant_id=dataset.tenant_id,
provider_name='openai', provider_name='openai',
provider_type=ProviderType.CUSTOM.value, provider_type=ProviderType.CUSTOM.value,
encrypted_config=json.dumps({'openai_api_key': 'TEST'}), encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
...@@ -369,6 +369,67 @@ def create_qdrant_indexes(): ...@@ -369,6 +369,67 @@ def create_qdrant_indexes():
click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green')) click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green'))
@click.command('update-qdrant-indexes', help='Update qdrant indexes.')
def update_qdrant_indexes():
click.echo(click.style('Start Update qdrant indexes.', fg='green'))
create_count = 0
page = 1
while True:
try:
datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
.order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
except NotFound:
break
page += 1
for dataset in datasets:
if dataset.index_struct_dict:
if dataset.index_struct_dict['type'] != 'qdrant':
try:
click.echo('Update dataset qdrant index: {}'.format(dataset.id))
try:
embedding_model = ModelFactory.get_embedding_model(
tenant_id=dataset.tenant_id,
model_provider_name=dataset.embedding_model_provider,
model_name=dataset.embedding_model
)
except Exception:
provider = Provider(
id='provider_id',
tenant_id=dataset.tenant_id,
provider_name='openai',
provider_type=ProviderType.CUSTOM.value,
encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
is_valid=True,
)
model_provider = OpenAIProvider(provider=provider)
embedding_model = OpenAIEmbedding(name="text-embedding-ada-002", model_provider=model_provider)
embeddings = CacheEmbedding(embedding_model)
from core.index.vector_index.qdrant_vector_index import QdrantVectorIndex, QdrantConfig
index = QdrantVectorIndex(
dataset=dataset,
config=QdrantConfig(
endpoint=current_app.config.get('QDRANT_URL'),
api_key=current_app.config.get('QDRANT_API_KEY'),
root_path=current_app.root_path
),
embeddings=embeddings
)
if index:
index.update_qdrant_dataset(dataset)
create_count += 1
else:
click.echo('passed.')
except Exception as e:
click.echo(
click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)), fg='red'))
continue
click.echo(click.style('Congratulations! Update {} dataset indexes.'.format(create_count), fg='green'))
def register_commands(app): def register_commands(app):
app.cli.add_command(reset_password) app.cli.add_command(reset_password)
app.cli.add_command(reset_email) app.cli.add_command(reset_email)
...@@ -378,3 +439,4 @@ def register_commands(app): ...@@ -378,3 +439,4 @@ def register_commands(app):
app.cli.add_command(sync_anthropic_hosted_providers) app.cli.add_command(sync_anthropic_hosted_providers)
app.cli.add_command(clean_unused_dataset_indexes) app.cli.add_command(clean_unused_dataset_indexes)
app.cli.add_command(create_qdrant_indexes) app.cli.add_command(create_qdrant_indexes)
app.cli.add_command(update_qdrant_indexes)
\ No newline at end of file
...@@ -219,3 +219,27 @@ class BaseVectorIndex(BaseIndex): ...@@ -219,3 +219,27 @@ class BaseVectorIndex(BaseIndex):
raise e raise e
logging.info(f"Dataset {dataset.id} recreate successfully.") logging.info(f"Dataset {dataset.id} recreate successfully.")
def update_qdrant_dataset(self, dataset: Dataset):
logging.info(f"update_qdrant_dataset {dataset.id}")
segment = db.session.query(DocumentSegment).filter(
DocumentSegment.dataset_id == dataset.id,
DocumentSegment.status == 'completed',
DocumentSegment.enabled == True
).first()
if segment:
try:
exist = self.text_exists(segment.index_node_id)
if exist:
index_struct = {
"type": 'qdrant',
"vector_store": {"class_prefix": dataset.index_struct_dict['vector_store']['class_prefix']}
}
dataset.index_struct = json.dumps(index_struct)
db.session.commit()
except Exception as e:
raise e
logging.info(f"Dataset {dataset.id} recreate successfully.")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment