Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
cf4b7667
Commit
cf4b7667
authored
May 24, 2023
by
John Wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: add sync index command
parent
7f50f41f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
88 additions
and
4 deletions
+88
-4
commands.py
api/commands.py
+84
-0
weaviate_vector_store_client.py
api/core/vector_store/weaviate_vector_store_client.py
+4
-4
No files found.
api/commands.py
View file @
cf4b7667
import
datetime
import
json
import
logging
import
random
import
string
import
click
from
llama_index.data_structs.node_v2
import
DocumentRelationship
,
Node
from
core.index.vector_index
import
VectorIndex
from
extensions.ext_redis
import
redis_client
from
libs.password
import
password_pattern
,
valid_password
,
hash_password
from
libs.helper
import
email
as
email_validate
from
extensions.ext_database
import
db
from
models.account
import
InvitationCode
from
models.dataset
import
Dataset
,
Document
,
DocumentSegment
from
models.model
import
Account
,
AppModelConfig
,
ApiToken
,
Site
,
App
,
RecommendedApp
import
secrets
import
base64
...
...
@@ -153,8 +158,87 @@ def generate_recommended_apps():
print
(
'Done!'
)
@
click
.
command
(
'sync-index'
,
help
=
'Sync vector objects to another vector store'
)
def
sync_index_vector_objects
():
print
(
'Syncing vector objects...'
)
datasets
=
db
.
session
.
query
(
Dataset
)
.
order_by
(
Dataset
.
created_at
.
asc
())
.
limit
(
100
)
.
all
()
while
len
(
datasets
)
>
0
:
latest_dataset
=
None
for
dataset
in
datasets
:
latest_dataset
=
dataset
if
dataset
.
indexing_technique
!=
"high_quality"
:
continue
vector_index
=
VectorIndex
(
dataset
=
dataset
)
print
(
'Syncing dataset {}...'
.
format
(
dataset
.
id
))
documents
=
db
.
session
.
query
(
Document
)
.
filter
(
Document
.
dataset_id
==
dataset
.
id
)
.
all
()
for
document
in
documents
:
if
document
.
indexing_status
!=
'completed'
or
document
.
archived
or
not
document
.
enabled
:
continue
cache_key
=
'synced_doc:{}'
.
format
(
document
.
id
)
cache_result
=
redis_client
.
get
(
cache_key
)
if
cache_result
is
not
None
:
print
(
'Document {} has been synced before, skip.'
.
format
(
document
.
id
))
continue
segments
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
document_id
==
document
.
id
,
DocumentSegment
.
enabled
==
True
)
\
.
order_by
(
DocumentSegment
.
position
.
asc
())
.
all
()
nodes
=
[]
previous_node
=
None
for
segment
in
segments
:
relationships
=
{
DocumentRelationship
.
SOURCE
:
document
.
id
}
if
previous_node
:
relationships
[
DocumentRelationship
.
PREVIOUS
]
=
previous_node
.
doc_id
previous_node
.
relationships
[
DocumentRelationship
.
NEXT
]
=
segment
.
index_node_id
node
=
Node
(
doc_id
=
segment
.
index_node_id
,
doc_hash
=
segment
.
index_node_hash
,
text
=
segment
.
content
,
extra_info
=
None
,
node_info
=
None
,
relationships
=
relationships
)
previous_node
=
node
nodes
.
append
(
node
)
try
:
vector_index
.
add_nodes
(
nodes
=
nodes
,
duplicate_check
=
True
)
redis_client
.
setex
(
cache_key
,
3600
,
1
)
except
Exception
:
logging
.
exception
(
'failed to add nodes to vector index'
)
continue
if
latest_dataset
is
None
:
datasets
=
[]
else
:
datasets
=
db
.
session
.
query
(
Dataset
)
.
filter
(
Dataset
.
created_at
>
latest_dataset
.
created_at
,
Dataset
.
id
!=
latest_dataset
.
id
)
.
order_by
(
Dataset
.
created_at
.
asc
())
.
limit
(
100
)
.
all
()
print
(
'Done!'
)
def
register_commands
(
app
):
app
.
cli
.
add_command
(
reset_password
)
app
.
cli
.
add_command
(
reset_email
)
app
.
cli
.
add_command
(
generate_invitation_codes
)
app
.
cli
.
add_command
(
generate_recommended_apps
)
app
.
cli
.
add_command
(
sync_index_vector_objects
)
api/core/vector_store/weaviate_vector_store_client.py
View file @
cf4b7667
...
...
@@ -110,10 +110,10 @@ class WeaviateWithSimilaritiesVectorStore(WeaviateVectorStore, EnhanceVectorStor
"class"
:
class_name
,
# <= note the capital "A".
"description"
:
f
"Class for {class_name}"
,
"properties"
:
properties
,
"vectorIndexConfig"
:
{
"efConstruction"
:
160
,
"maxConnections"
:
32
},
#
"vectorIndexConfig": {
#
"efConstruction": 160,
#
"maxConnections": 32
#
},
}
client
.
schema
.
create_class
(
class_obj
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment