Commit a5d73a86 authored by Joel's avatar Joel

Merge branch 'main' into feat/conversation-pin-op

parents 21330a41 c67ecff3
......@@ -3,7 +3,6 @@ from typing import Optional
import langchain
from flask import Flask
from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING
from pydantic import BaseModel
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
......@@ -22,9 +21,6 @@ hosted_llm_credentials = HostedLLMCredentials()
def init_app(app: Flask):
formatter = OneLineFormatter()
DEFAULT_FORMATTER_MAPPING['f-string'] = formatter.format
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
langchain.verbose = True
......
......@@ -23,7 +23,7 @@ from core.memory.read_only_conversation_token_db_buffer_shared_memory import \
from core.memory.read_only_conversation_token_db_string_buffer_shared_memory import \
ReadOnlyConversationTokenDBStringBufferSharedMemory
from core.prompt.prompt_builder import PromptBuilder
from core.prompt.prompt_template import OutLinePromptTemplate
from core.prompt.prompt_template import JinjaPromptTemplate
from core.prompt.prompts import MORE_LIKE_THIS_GENERATE_PROMPT
from models.model import App, AppModelConfig, Account, Conversation, Message
......@@ -35,6 +35,8 @@ class Completion:
"""
errors: ProviderTokenNotInitError
"""
query = PromptBuilder.process_template(query)
memory = None
if conversation:
# get memory of conversation (read-only)
......@@ -141,18 +143,17 @@ class Completion:
memory: Optional[ReadOnlyConversationTokenDBBufferSharedMemory]) -> \
Tuple[Union[str | List[BaseMessage]], Optional[List[str]]]:
# disable template string in query
query_params = OutLinePromptTemplate.from_template(template=query).input_variables
if query_params:
for query_param in query_params:
if query_param not in inputs:
inputs[query_param] = '{' + query_param + '}'
# query_params = JinjaPromptTemplate.from_template(template=query).input_variables
# if query_params:
# for query_param in query_params:
# if query_param not in inputs:
# inputs[query_param] = '{{' + query_param + '}}'
pre_prompt = PromptBuilder.process_template(pre_prompt) if pre_prompt else pre_prompt
if mode == 'completion':
prompt_template = OutLinePromptTemplate.from_template(
prompt_template = JinjaPromptTemplate.from_template(
template=("""Use the following CONTEXT as your learned knowledge:
[CONTEXT]
{context}
{{context}}
[END CONTEXT]
When answer to user:
......@@ -162,16 +163,16 @@ Avoid mentioning that you obtained the information from the context.
And answer according to the language of the user's question.
""" if chain_output else "")
+ (pre_prompt + "\n" if pre_prompt else "")
+ "{query}\n"
+ "{{query}}\n"
)
if chain_output:
inputs['context'] = chain_output
context_params = OutLinePromptTemplate.from_template(template=chain_output).input_variables
if context_params:
for context_param in context_params:
if context_param not in inputs:
inputs[context_param] = '{' + context_param + '}'
# context_params = JinjaPromptTemplate.from_template(template=chain_output).input_variables
# if context_params:
# for context_param in context_params:
# if context_param not in inputs:
# inputs[context_param] = '{{' + context_param + '}}'
prompt_inputs = {k: inputs[k] for k in prompt_template.input_variables if k in inputs}
prompt_content = prompt_template.format(
......@@ -195,7 +196,7 @@ And answer according to the language of the user's question.
if pre_prompt:
pre_prompt_inputs = {k: inputs[k] for k in
OutLinePromptTemplate.from_template(template=pre_prompt).input_variables
JinjaPromptTemplate.from_template(template=pre_prompt).input_variables
if k in inputs}
if pre_prompt_inputs:
......@@ -205,7 +206,7 @@ And answer according to the language of the user's question.
human_inputs['context'] = chain_output
human_message_prompt += """Use the following CONTEXT as your learned knowledge.
[CONTEXT]
{context}
{{context}}
[END CONTEXT]
When answer to user:
......@@ -218,7 +219,7 @@ And answer according to the language of the user's question.
if pre_prompt:
human_message_prompt += pre_prompt
query_prompt = "\nHuman: {query}\nAI: "
query_prompt = "\nHuman: {{query}}\nAI: "
if memory:
# append chat histories
......@@ -234,11 +235,11 @@ And answer according to the language of the user's question.
histories = cls.get_history_messages_from_memory(memory, rest_tokens)
# disable template string in query
histories_params = OutLinePromptTemplate.from_template(template=histories).input_variables
if histories_params:
for histories_param in histories_params:
if histories_param not in human_inputs:
human_inputs[histories_param] = '{' + histories_param + '}'
# histories_params = JinjaPromptTemplate.from_template(template=histories).input_variables
# if histories_params:
# for histories_param in histories_params:
# if histories_param not in human_inputs:
# human_inputs[histories_param] = '{{' + histories_param + '}}'
human_message_prompt += "\n\n" + histories
......
......@@ -10,7 +10,7 @@ from core.constant import llm_constant
from core.llm.llm_builder import LLMBuilder
from core.llm.provider.llm_provider_service import LLMProviderService
from core.prompt.prompt_builder import PromptBuilder
from core.prompt.prompt_template import OutLinePromptTemplate
from core.prompt.prompt_template import JinjaPromptTemplate
from events.message_event import message_was_created
from extensions.ext_database import db
from extensions.ext_redis import redis_client
......@@ -78,7 +78,7 @@ class ConversationMessageTask:
if self.mode == 'chat':
introduction = self.app_model_config.opening_statement
if introduction:
prompt_template = OutLinePromptTemplate.from_template(template=PromptBuilder.process_template(introduction))
prompt_template = JinjaPromptTemplate.from_template(template=introduction)
prompt_inputs = {k: self.inputs[k] for k in prompt_template.input_variables if k in self.inputs}
try:
introduction = prompt_template.format(**prompt_inputs)
......@@ -86,8 +86,7 @@ class ConversationMessageTask:
pass
if self.app_model_config.pre_prompt:
pre_prompt = PromptBuilder.process_template(self.app_model_config.pre_prompt)
system_message = PromptBuilder.to_system_message(pre_prompt, self.inputs)
system_message = PromptBuilder.to_system_message(self.app_model_config.pre_prompt, self.inputs)
system_instruction = system_message.content
llm = LLMBuilder.to_llm(self.tenant_id, self.model_name)
system_instruction_tokens = llm.get_messages_tokens([system_message])
......@@ -157,7 +156,7 @@ class ConversationMessageTask:
self.message.message = llm_message.prompt
self.message.message_tokens = message_tokens
self.message.message_unit_price = message_unit_price
self.message.answer = llm_message.completion.strip() if llm_message.completion else ''
self.message.answer = PromptBuilder.process_template(llm_message.completion.strip()) if llm_message.completion else ''
self.message.answer_tokens = answer_tokens
self.message.answer_unit_price = answer_unit_price
self.message.provider_response_latency = llm_message.latency
......
......@@ -36,8 +36,9 @@ class ExcelLoader(BaseLoader):
if keys == []:
keys = list(map(str, row))
else:
row_dict = dict(zip(keys, row))
row_dict = dict(zip(keys, list(map(str, row))))
row_dict = {k: v for k, v in row_dict.items() if v}
data.append(json.dumps(row_dict, ensure_ascii=False))
item = ''.join(f'{k}:{v}\n' for k, v in row_dict.items())
data.append(item)
return [Document(page_content='\n\n'.join(data))]
......@@ -134,6 +134,16 @@ class NotionLoader(BaseLoader):
else:
value = property_value[type]
data[property_name] = value
row_dict = {k: v for k, v in data.items() if v}
row_content = ''
for key, value in row_dict.items():
if isinstance(value, dict):
value_dict = {k: v for k, v in value.items() if v}
value_content = ''.join(f'{k}:{v} ' for k, v in value_dict.items())
row_content = row_content + f'{key}:{value_content}\n'
else:
row_content = row_content + f'{key}:{value}\n'
database_content_list.append(row_content)
database_content_list.append(json.dumps(data, ensure_ascii=False))
return "\n\n".join(database_content_list)
......
import logging
from langchain import PromptTemplate
from langchain.chat_models.base import BaseChatModel
from langchain.schema import HumanMessage, OutputParserException
......@@ -10,7 +11,7 @@ from core.llm.token_calculator import TokenCalculator
from core.prompt.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser
from core.prompt.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser
from core.prompt.prompt_template import OutLinePromptTemplate
from core.prompt.prompt_template import JinjaPromptTemplate, OutLinePromptTemplate
from core.prompt.prompts import CONVERSATION_TITLE_PROMPT, CONVERSATION_SUMMARY_PROMPT, INTRODUCTION_GENERATE_PROMPT
......@@ -91,8 +92,8 @@ class LLMGenerator:
output_parser = SuggestedQuestionsAfterAnswerOutputParser()
format_instructions = output_parser.get_format_instructions()
prompt = OutLinePromptTemplate(
template="{histories}\n{format_instructions}\nquestions:\n",
prompt = JinjaPromptTemplate(
template="{{histories}}\n{{format_instructions}}\nquestions:\n",
input_variables=["histories"],
partial_variables={"format_instructions": format_instructions}
)
......
......@@ -17,14 +17,16 @@ def handle_llm_exceptions(func):
raise LLMBadRequestError(str(e))
except openai.error.APIConnectionError as e:
logging.exception("Failed to connect to OpenAI API.")
raise LLMAPIConnectionError(str(e))
raise LLMAPIConnectionError(e.__class__.__name__ + ":" + str(e))
except (openai.error.APIError, openai.error.ServiceUnavailableError, openai.error.Timeout) as e:
logging.exception("OpenAI service unavailable.")
raise LLMAPIUnavailableError(str(e))
raise LLMAPIUnavailableError(e.__class__.__name__ + ":" + str(e))
except openai.error.RateLimitError as e:
raise LLMRateLimitError(str(e))
except openai.error.AuthenticationError as e:
raise LLMAuthorizationError(str(e))
except openai.error.OpenAIError as e:
raise LLMBadRequestError(e.__class__.__name__ + ":" + str(e))
return wrapper
......@@ -39,13 +41,15 @@ def handle_llm_exceptions_async(func):
raise LLMBadRequestError(str(e))
except openai.error.APIConnectionError as e:
logging.exception("Failed to connect to OpenAI API.")
raise LLMAPIConnectionError(str(e))
raise LLMAPIConnectionError(e.__class__.__name__ + ":" + str(e))
except (openai.error.APIError, openai.error.ServiceUnavailableError, openai.error.Timeout) as e:
logging.exception("OpenAI service unavailable.")
raise LLMAPIUnavailableError(str(e))
raise LLMAPIUnavailableError(e.__class__.__name__ + ":" + str(e))
except openai.error.RateLimitError as e:
raise LLMRateLimitError(str(e))
except openai.error.AuthenticationError as e:
raise LLMAuthorizationError(str(e))
except openai.error.OpenAIError as e:
raise LLMBadRequestError(e.__class__.__name__ + ":" + str(e))
return wrapper
......@@ -3,13 +3,13 @@ import re
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.schema import BaseMessage
from core.prompt.prompt_template import OutLinePromptTemplate
from core.prompt.prompt_template import JinjaPromptTemplate
class PromptBuilder:
@classmethod
def to_system_message(cls, prompt_content: str, inputs: dict) -> BaseMessage:
prompt_template = OutLinePromptTemplate.from_template(prompt_content)
prompt_template = JinjaPromptTemplate.from_template(prompt_content)
system_prompt_template = SystemMessagePromptTemplate(prompt=prompt_template)
prompt_inputs = {k: inputs[k] for k in system_prompt_template.input_variables if k in inputs}
system_message = system_prompt_template.format(**prompt_inputs)
......@@ -17,7 +17,7 @@ class PromptBuilder:
@classmethod
def to_ai_message(cls, prompt_content: str, inputs: dict) -> BaseMessage:
prompt_template = OutLinePromptTemplate.from_template(prompt_content)
prompt_template = JinjaPromptTemplate.from_template(prompt_content)
ai_prompt_template = AIMessagePromptTemplate(prompt=prompt_template)
prompt_inputs = {k: inputs[k] for k in ai_prompt_template.input_variables if k in inputs}
ai_message = ai_prompt_template.format(**prompt_inputs)
......@@ -25,13 +25,14 @@ class PromptBuilder:
@classmethod
def to_human_message(cls, prompt_content: str, inputs: dict) -> BaseMessage:
prompt_template = OutLinePromptTemplate.from_template(prompt_content)
prompt_template = JinjaPromptTemplate.from_template(prompt_content)
human_prompt_template = HumanMessagePromptTemplate(prompt=prompt_template)
human_message = human_prompt_template.format(**inputs)
return human_message
@classmethod
def process_template(cls, template: str):
processed_template = re.sub(r'\{([a-zA-Z_]\w+?)\}', r'\1', template)
processed_template = re.sub(r'\{\{([a-zA-Z_]\w+?)\}\}', r'{\1}', processed_template)
processed_template = re.sub(r'\{{2}(.+)\}{2}', r'{\1}', template)
# processed_template = re.sub(r'\{([a-zA-Z_]\w+?)\}', r'\1', template)
# processed_template = re.sub(r'\{\{([a-zA-Z_]\w+?)\}\}', r'{\1}', processed_template)
return processed_template
import re
from typing import Any
from jinja2 import Environment, meta
from langchain import PromptTemplate
from langchain.formatting import StrictFormatter
class JinjaPromptTemplate(PromptTemplate):
template_format: str = "jinja2"
"""The format of the prompt template. Options are: 'f-string', 'jinja2'."""
@classmethod
def from_template(cls, template: str, **kwargs: Any) -> PromptTemplate:
"""Load a prompt template from a template."""
env = Environment()
template = template.replace("{{}}", "{}")
ast = env.parse(template)
input_variables = meta.find_undeclared_variables(ast)
if "partial_variables" in kwargs:
partial_variables = kwargs["partial_variables"]
input_variables = {
var for var in input_variables if var not in partial_variables
}
return cls(
input_variables=list(sorted(input_variables)), template=template, **kwargs
)
class OutLinePromptTemplate(PromptTemplate):
@classmethod
def from_template(cls, template: str, **kwargs: Any) -> PromptTemplate:
......@@ -16,6 +40,24 @@ class OutLinePromptTemplate(PromptTemplate):
input_variables=list(sorted(input_variables)), template=template, **kwargs
)
def format(self, **kwargs: Any) -> str:
"""Format the prompt with the inputs.
Args:
kwargs: Any arguments to be passed to the prompt template.
Returns:
A formatted string.
Example:
.. code-block:: python
prompt.format(variable1="foo")
"""
kwargs = self._merge_partial_and_user_variables(**kwargs)
return OneLineFormatter().format(self.template, **kwargs)
class OneLineFormatter(StrictFormatter):
def parse(self, format_string):
......
CONVERSATION_TITLE_PROMPT = (
"Human:{query}\n-----\n"
"Human:{{query}}\n-----\n"
"Help me summarize the intent of what the human said and provide a title, the title should not exceed 20 words.\n"
"If the human said is conducted in Chinese, you should return a Chinese title.\n"
"If the human said is conducted in English, you should return an English title.\n"
......@@ -19,7 +19,7 @@ CONVERSATION_SUMMARY_PROMPT = (
INTRODUCTION_GENERATE_PROMPT = (
"I am designing a product for users to interact with an AI through dialogue. "
"The Prompt given to the AI before the conversation is:\n\n"
"```\n{prompt}\n```\n\n"
"```\n{{prompt}}\n```\n\n"
"Please generate a brief introduction of no more than 50 words that greets the user, based on this Prompt. "
"Do not reveal the developer's motivation or deep logic behind the Prompt, "
"but focus on building a relationship with the user:\n"
......@@ -27,13 +27,13 @@ INTRODUCTION_GENERATE_PROMPT = (
MORE_LIKE_THIS_GENERATE_PROMPT = (
"-----\n"
"{original_completion}\n"
"{{original_completion}}\n"
"-----\n\n"
"Please use the above content as a sample for generating the result, "
"and include key information points related to the original sample in the result. "
"Try to rephrase this information in different ways and predict according to the rules below.\n\n"
"-----\n"
"{prompt}\n"
"{{prompt}}\n"
)
SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
......
......@@ -140,8 +140,6 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
router.push(`/datasets/${datasetId}/documents/create`)
}
router.prefetch(`/datasets/${datasetId}/documents/create`)
const isLoading = !documentsRes && !error
const handleSaveNotionPageSelected = async (selectedPages: (DataSourceNotionPage & { workspace_id: string })[]) => {
......
......@@ -309,7 +309,7 @@ const DocumentList: FC<IDocumentListProps> = ({ documents = [], datasetId, onUpd
key={doc.id}
className={'border-b border-gray-200 h-8 hover:bg-gray-50 cursor-pointer'}
onClick={() => {
router.push(`datasets/${datasetId}/documents/${doc.id}`)
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}>
<td className='text-left align-middle text-gray-500 text-xs'>{doc.position}</td>
<td className={s.tdValue}>
......
......@@ -44,7 +44,6 @@
"echarts-for-react": "^3.0.2",
"emoji-mart": "^5.5.2",
"eslint": "8.36.0",
"eslint-config-next": "13.2.4",
"husky": "^8.0.3",
"i18next": "^22.4.13",
"i18next-resources-to-backend": "^1.1.3",
......@@ -53,10 +52,10 @@
"katex": "^0.16.7",
"lodash-es": "^4.17.21",
"negotiator": "^0.6.3",
"next": "13.2.4",
"next": "^13.4.7",
"qs": "^6.11.1",
"react": "18.2.0",
"react-dom": "18.2.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-error-boundary": "^4.0.2",
"react-headless-pagination": "^1.1.4",
"react-i18next": "^12.2.0",
......@@ -89,6 +88,7 @@
"@types/negotiator": "^0.6.1",
"@types/qs": "^6.9.7",
"@types/sortablejs": "^1.15.1",
"eslint-config-next": "^13.4.7",
"eslint-plugin-react-hooks": "^4.6.0",
"lint-staged": "^13.2.2",
"miragejs": "^0.1.47",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment