Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
4be30876
Unverified
Commit
4be30876
authored
Feb 23, 2024
by
Jyong
Committed by
GitHub
Feb 23, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix/new RAG bugs (#2547)
Co-authored-by:
jyong
<
jyong@dify.ai
>
parent
49da8a23
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
9 additions
and
6 deletions
+9
-6
indexing_runner.py
api/core/indexing_runner.py
+1
-1
retrieval_service.py
api/core/rag/datasource/retrieval_service.py
+6
-2
dataset_multi_retriever_tool.py
...ls/tool/dataset_retriever/dataset_multi_retriever_tool.py
+1
-1
dataset_retriever_tool.py
...re/tools/tool/dataset_retriever/dataset_retriever_tool.py
+1
-1
clean_dataset_task.py
api/tasks/clean_dataset_task.py
+0
-1
No files found.
api/core/indexing_runner.py
View file @
4be30876
...
@@ -365,7 +365,7 @@ class IndexingRunner:
...
@@ -365,7 +365,7 @@ class IndexingRunner:
notion_info
=
{
notion_info
=
{
"notion_workspace_id"
:
data_source_info
[
'notion_workspace_id'
],
"notion_workspace_id"
:
data_source_info
[
'notion_workspace_id'
],
"notion_obj_id"
:
data_source_info
[
'notion_page_id'
],
"notion_obj_id"
:
data_source_info
[
'notion_page_id'
],
"notion_page_type"
:
data_source_info
[
'
notion_page_
type'
],
"notion_page_type"
:
data_source_info
[
'type'
],
"document"
:
dataset_document
"document"
:
dataset_document
},
},
document_model
=
dataset_document
.
doc_form
document_model
=
dataset_document
.
doc_form
...
...
api/core/rag/datasource/retrieval_service.py
View file @
4be30876
...
@@ -2,7 +2,6 @@ import threading
...
@@ -2,7 +2,6 @@ import threading
from
typing
import
Optional
from
typing
import
Optional
from
flask
import
Flask
,
current_app
from
flask
import
Flask
,
current_app
from
flask_login
import
current_user
from
core.rag.data_post_processor.data_post_processor
import
DataPostProcessor
from
core.rag.data_post_processor.data_post_processor
import
DataPostProcessor
from
core.rag.datasource.keyword.keyword_factory
import
Keyword
from
core.rag.datasource.keyword.keyword_factory
import
Keyword
...
@@ -27,6 +26,11 @@ class RetrievalService:
...
@@ -27,6 +26,11 @@ class RetrievalService:
@
classmethod
@
classmethod
def
retrieve
(
cls
,
retrival_method
:
str
,
dataset_id
:
str
,
query
:
str
,
def
retrieve
(
cls
,
retrival_method
:
str
,
dataset_id
:
str
,
query
:
str
,
top_k
:
int
,
score_threshold
:
Optional
[
float
]
=
.0
,
reranking_model
:
Optional
[
dict
]
=
None
):
top_k
:
int
,
score_threshold
:
Optional
[
float
]
=
.0
,
reranking_model
:
Optional
[
dict
]
=
None
):
dataset
=
db
.
session
.
query
(
Dataset
)
.
filter
(
Dataset
.
id
==
dataset_id
)
.
first
()
if
not
dataset
or
dataset
.
available_document_count
==
0
or
dataset
.
available_segment_count
==
0
:
return
[]
all_documents
=
[]
all_documents
=
[]
threads
=
[]
threads
=
[]
# retrieval_model source with keyword
# retrieval_model source with keyword
...
@@ -73,7 +77,7 @@ class RetrievalService:
...
@@ -73,7 +77,7 @@ class RetrievalService:
thread
.
join
()
thread
.
join
()
if
retrival_method
==
'hybrid_search'
:
if
retrival_method
==
'hybrid_search'
:
data_post_processor
=
DataPostProcessor
(
str
(
current_user
.
current_
tenant_id
),
reranking_model
,
False
)
data_post_processor
=
DataPostProcessor
(
str
(
dataset
.
tenant_id
),
reranking_model
,
False
)
all_documents
=
data_post_processor
.
invoke
(
all_documents
=
data_post_processor
.
invoke
(
query
=
query
,
query
=
query
,
documents
=
all_documents
,
documents
=
all_documents
,
...
...
api/core/tools/tool/dataset_retriever/dataset_multi_retriever_tool.py
View file @
4be30876
...
@@ -171,7 +171,7 @@ class DatasetMultiRetrieverTool(BaseTool):
...
@@ -171,7 +171,7 @@ class DatasetMultiRetrieverTool(BaseTool):
if
dataset
.
indexing_technique
==
"economy"
:
if
dataset
.
indexing_technique
==
"economy"
:
# use keyword table query
# use keyword table query
documents
=
RetrievalService
.
retrieve
(
retrival_method
=
retrieval_model
[
'search_method'
]
,
documents
=
RetrievalService
.
retrieve
(
retrival_method
=
'keyword_search'
,
dataset_id
=
dataset
.
id
,
dataset_id
=
dataset
.
id
,
query
=
query
,
query
=
query
,
top_k
=
self
.
top_k
top_k
=
self
.
top_k
...
...
api/core/tools/tool/dataset_retriever/dataset_retriever_tool.py
View file @
4be30876
...
@@ -69,7 +69,7 @@ class DatasetRetrieverTool(BaseTool):
...
@@ -69,7 +69,7 @@ class DatasetRetrieverTool(BaseTool):
retrieval_model
=
dataset
.
retrieval_model
if
dataset
.
retrieval_model
else
default_retrieval_model
retrieval_model
=
dataset
.
retrieval_model
if
dataset
.
retrieval_model
else
default_retrieval_model
if
dataset
.
indexing_technique
==
"economy"
:
if
dataset
.
indexing_technique
==
"economy"
:
# use keyword table query
# use keyword table query
documents
=
RetrievalService
.
retrieve
(
retrival_method
=
retrieval_model
[
'search_method'
]
,
documents
=
RetrievalService
.
retrieve
(
retrival_method
=
'keyword_search'
,
dataset_id
=
dataset
.
id
,
dataset_id
=
dataset
.
id
,
query
=
query
,
query
=
query
,
top_k
=
self
.
top_k
top_k
=
self
.
top_k
...
...
api/tasks/clean_dataset_task.py
View file @
4be30876
...
@@ -40,7 +40,6 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str,
...
@@ -40,7 +40,6 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str,
indexing_technique
=
indexing_technique
,
indexing_technique
=
indexing_technique
,
index_struct
=
index_struct
,
index_struct
=
index_struct
,
collection_binding_id
=
collection_binding_id
,
collection_binding_id
=
collection_binding_id
,
doc_form
=
doc_form
)
)
documents
=
db
.
session
.
query
(
Document
)
.
filter
(
Document
.
dataset_id
==
dataset_id
)
.
all
()
documents
=
db
.
session
.
query
(
Document
)
.
filter
(
Document
.
dataset_id
==
dataset_id
)
.
all
()
segments
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
dataset_id
==
dataset_id
)
.
all
()
segments
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
dataset_id
==
dataset_id
)
.
all
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment