Commit cb8d2a58 authored by 陈立彬's avatar 陈立彬

易错词替换

parent 30105aa8
package cn.breeze.elleai.application.dto.inner;
import lombok.Data;
import java.io.Serializable;
import java.util.List;
/**
* @author yangyw
*/
@Data
public class PinYin implements Serializable {
/**
* 原句子
*/
private String sentence;
/**
* 转化后的拼音
*/
private String pinyin;
/**
* 对照
*/
private List<Term> terms;
}
package cn.breeze.elleai.application.dto.inner;
import lombok.Data;
import java.io.Serializable;
/**
* @author YANGYW
*/
@Data
public class Term implements Serializable {
private String hans;
private String pinyin;
}
...@@ -20,6 +20,12 @@ public class ProperNounSaveDto implements Serializable { ...@@ -20,6 +20,12 @@ public class ProperNounSaveDto implements Serializable {
@Schema(description = "专有名词名称") @Schema(description = "专有名词名称")
private String name; private String name;
/**
* 中文拼音
*/
@Schema(description = "中文拼音")
private String pinyin;
@Schema(description = "相似词列表") @Schema(description = "相似词列表")
@JsonProperty("similar_word_list") @JsonProperty("similar_word_list")
private List<String> similarWordList; private List<String> similarWordList;
......
...@@ -20,6 +20,11 @@ public class ProperNounDto implements Serializable { ...@@ -20,6 +20,11 @@ public class ProperNounDto implements Serializable {
*/ */
private String name; private String name;
/**
* 中文拼音
*/
private String pinyin;
@Schema(description = "相似词列表") @Schema(description = "相似词列表")
@JsonProperty("similar_word_list") @JsonProperty("similar_word_list")
private List<String> similarWordList = new ArrayList<>(); private List<String> similarWordList = new ArrayList<>();
......
package cn.breeze.elleai.application.service; package cn.breeze.elleai.application.service;
import cn.breeze.elleai.application.dto.PageResult; import cn.breeze.elleai.application.dto.PageResult;
import cn.breeze.elleai.application.dto.inner.PinYin;
import cn.breeze.elleai.application.dto.inner.Term;
import cn.breeze.elleai.application.dto.request.ProperNounRequestDto; import cn.breeze.elleai.application.dto.request.ProperNounRequestDto;
import cn.breeze.elleai.application.dto.request.ProperNounSaveDto; import cn.breeze.elleai.application.dto.request.ProperNounSaveDto;
import cn.breeze.elleai.application.dto.response.AppRoleDto; import cn.breeze.elleai.application.dto.response.AppRoleDto;
...@@ -13,14 +15,16 @@ import cn.breeze.elleai.domain.sparring.model.response.ProperNounResponseModel; ...@@ -13,14 +15,16 @@ import cn.breeze.elleai.domain.sparring.model.response.ProperNounResponseModel;
import cn.breeze.elleai.domain.sparring.service.CommonService; import cn.breeze.elleai.domain.sparring.service.CommonService;
import cn.breeze.elleai.util.ChineseCharacterUtil; import cn.breeze.elleai.util.ChineseCharacterUtil;
import cn.breeze.elleai.util.Codes; import cn.breeze.elleai.util.Codes;
import cn.breeze.elleai.util.PinYinUtils;
import cn.hutool.core.bean.BeanUtil; import cn.hutool.core.bean.BeanUtil;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.mybatisflex.core.paginate.Page; import com.mybatisflex.core.paginate.Page;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.util.ArrayList; import java.util.ArrayList;
...@@ -31,6 +35,7 @@ import java.util.stream.Collectors; ...@@ -31,6 +35,7 @@ import java.util.stream.Collectors;
/** /**
* 应用服务 * 应用服务
*/ */
@Slf4j
@Component @Component
@RequiredArgsConstructor @RequiredArgsConstructor
public class AppCommonService { public class AppCommonService {
...@@ -77,22 +82,11 @@ public class AppCommonService { ...@@ -77,22 +82,11 @@ public class AppCommonService {
*/ */
public void saveproperNoun(ProperNounSaveDto dto) { public void saveproperNoun(ProperNounSaveDto dto) {
if(Objects.nonNull(dto)) {
String upperCase = ChineseCharacterUtil.getUpperCase(dto.getName(), false);
String upperCase2 = ChineseCharacterUtil.getUpperCase(dto.getName(), true);
if(CollectionUtil.isNotEmpty(dto.getSimilarWordList())) {
dto.getSimilarWordList().forEach(v -> {
System.out.println(v + "==" +ChineseCharacterUtil.getUpperCase(dto.getName(), true));
});
}
} else {
ProperNounSaveModel model = BeanUtil.copyProperties(dto, ProperNounSaveModel.class); ProperNounSaveModel model = BeanUtil.copyProperties(dto, ProperNounSaveModel.class);
model.setSimilarWords(JSON.toJSONString(dto.getSimilarWordList())); model.setSimilarWords(JSON.toJSONString(dto.getSimilarWordList()));
model.setPinyin(PinYinUtils.hansToPinYin(dto.getName()));
commonService.saveProperNoun(model); commonService.saveProperNoun(model);
} }
}
/** /**
...@@ -123,6 +117,34 @@ public class AppCommonService { ...@@ -123,6 +117,34 @@ public class AppCommonService {
return pageResult; return pageResult;
} }
/**
* 专有名词列表
* @return
*/
public List<ProperNounDto> properNounList() {
List<ProperNounDto> resultList = null;
ProperNounRequestModel requestModel = new ProperNounRequestModel();
requestModel.setStatus(1);
List<ProperNounResponseModel> modelList = commonService.properNounList(requestModel);
if(CollectionUtil.isNotEmpty(modelList)) {
resultList = modelList.stream().map(v -> {
ProperNounDto dto = BeanUtil.copyProperties(v, ProperNounDto.class);
if(StrUtil.isNotEmpty(v.getSimilarWords())) {
dto.setSimilarWordList(JSONObject.parseArray(v.getSimilarWords(), String.class));
}
return dto;
}).collect(Collectors.toList());
}
return resultList;
}
/** /**
* 知识库分类列表 * 知识库分类列表
* @return * @return
...@@ -148,4 +170,73 @@ public class AppCommonService { ...@@ -148,4 +170,73 @@ public class AppCommonService {
} }
return null; return null;
} }
/**
* 易错词纠正
* @param sentence
* @return
*/
public String sentenceWordCorrect(String sentence) {
PinYin pinYin = PinYinUtils.hansToPinYinObj(sentence);
String corrected = pinYin.getSentence();
List<ProperNounDto> filterWords = this.properNounList();
if (CollUtil.isNotEmpty(filterWords)) {
//先判断转化后的拼音句子是否包含了过滤词的拼音
List<ProperNounDto> found = new ArrayList<>();
for (ProperNounDto filterWord : filterWords) {
if (StrUtil.contains(pinYin.getPinyin(), filterWord.getPinyin())) {
found.add(filterWord);
}
}
if (CollUtil.isNotEmpty(found)) {
//需要纠正易错词
List<Term> terms = pinYin.getTerms();
for (ProperNounDto word : found) {
String py = word.getPinyin();
List<String> samples = word.getSimilarWordList();
StringBuilder sb = new StringBuilder();
StringBuilder hans = new StringBuilder();
for (Term term : terms) {
//拼音匹配开始
if (StrUtil.startWith(py, term.getPinyin())) {
sb.append(term.getPinyin());
hans.append(term.getHans());
} else {
//判断是否空格,若为空格,需要支持忽略后匹配并替换
String current;
if (StrUtil.isBlank(term.getPinyin())) {
if (!sb.isEmpty()) {
hans.append(term.getHans());
}
} else {
sb.append(term.getPinyin());
current = sb.toString();
if (StrUtil.equals(py, current)) {
//拼接词的拼音等于易错词的拼音或者是易错词的拼音一部分
hans.append(term.getHans());
break;
//结束匹配,把word作为
} else if (StrUtil.startWith(py, current)) {
hans.append(term.getHans());
} else {
//匹配失败,重置缓存
sb = new StringBuilder();
hans = new StringBuilder();
}
}
}
}
if (!sb.isEmpty()) {
log.info("匹配易错词[{}],当前匹配汉字为:{}, 拼音为:{}", word.getName(), hans, sb);
String hansStr = hans.toString().replace(" ", "");
if (CollUtil.contains(samples, hansStr)) {
corrected = StrUtil.replaceFirst(corrected, hans.toString(), word.getName());
}
}
}
}
}
return corrected;
}
} }
...@@ -6,6 +6,7 @@ import cn.breeze.elleai.application.dto.PageResult; ...@@ -6,6 +6,7 @@ import cn.breeze.elleai.application.dto.PageResult;
import cn.breeze.elleai.application.dto.request.*; import cn.breeze.elleai.application.dto.request.*;
import cn.breeze.elleai.application.dto.response.*; import cn.breeze.elleai.application.dto.response.*;
import cn.breeze.elleai.application.service.AppChatCompletionService; import cn.breeze.elleai.application.service.AppChatCompletionService;
import cn.breeze.elleai.application.service.AppCommonService;
import cn.breeze.elleai.config.QueryParam; import cn.breeze.elleai.config.QueryParam;
import cn.breeze.elleai.util.UserPrincipal; import cn.breeze.elleai.util.UserPrincipal;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
...@@ -23,6 +24,7 @@ import org.springframework.web.bind.annotation.*; ...@@ -23,6 +24,7 @@ import org.springframework.web.bind.annotation.*;
public class ChatCompletionMobileController { public class ChatCompletionMobileController {
private final AppChatCompletionService chatCompletionService; private final AppChatCompletionService chatCompletionService;
private final AppCommonService commonService;
@Operation(summary = "助手列表") @Operation(summary = "助手列表")
@GetMapping("/assistant/list") @GetMapping("/assistant/list")
...@@ -54,6 +56,7 @@ public class ChatCompletionMobileController { ...@@ -54,6 +56,7 @@ public class ChatCompletionMobileController {
@PostMapping("/ask") @PostMapping("/ask")
public ApiResponse<UserAskResultMobileDto> ask(@Parameter(hidden = true) UserPrincipal userPrincipal, public ApiResponse<UserAskResultMobileDto> ask(@Parameter(hidden = true) UserPrincipal userPrincipal,
@RequestBody UserQaMobileRequestDto request) { @RequestBody UserQaMobileRequestDto request) {
request.setContent(commonService.sentenceWordCorrect(request.getContent()));
UserAskResultMobileDto result = chatCompletionService.userAsk(userPrincipal, request); UserAskResultMobileDto result = chatCompletionService.userAsk(userPrincipal, request);
return ApiResponse.ok(result); return ApiResponse.ok(result);
} }
......
...@@ -20,6 +20,8 @@ public class ProperNounSaveModel implements Serializable { ...@@ -20,6 +20,8 @@ public class ProperNounSaveModel implements Serializable {
*/ */
private String name; private String name;
private String pinyin;
/** /**
* 相似词 * 相似词
*/ */
......
...@@ -16,6 +16,11 @@ public class ProperNounResponseModel implements Serializable { ...@@ -16,6 +16,11 @@ public class ProperNounResponseModel implements Serializable {
*/ */
private String name; private String name;
/**
* 中文拼音
*/
private String pinyin;
/** /**
* 相似词列表 * 相似词列表
*/ */
......
...@@ -66,6 +66,9 @@ public class CommonServiceImpl implements CommonService{ ...@@ -66,6 +66,9 @@ public class CommonServiceImpl implements CommonService{
if(StrUtil.isNotEmpty(request.getName())) { if(StrUtil.isNotEmpty(request.getName())) {
queryWrapper.where(PROPER_NOUN_ENTITY.NAME.like("%"+request.getName()+"%")); queryWrapper.where(PROPER_NOUN_ENTITY.NAME.like("%"+request.getName()+"%"));
} }
if(Objects.nonNull(request.getStatus())) {
queryWrapper.where(PROPER_NOUN_ENTITY.STATUS.eq(request.getStatus()));
}
queryWrapper.orderBy(PROPER_NOUN_ENTITY.CREATE_TIME, false); queryWrapper.orderBy(PROPER_NOUN_ENTITY.CREATE_TIME, false);
return properNounMapper.selectListByQueryAs(queryWrapper, ProperNounResponseModel.class); return properNounMapper.selectListByQueryAs(queryWrapper, ProperNounResponseModel.class);
......
...@@ -37,6 +37,11 @@ public class ProperNounEntity implements Serializable { ...@@ -37,6 +37,11 @@ public class ProperNounEntity implements Serializable {
*/ */
private String name; private String name;
/**
* 中文拼音
*/
private String pinyin;
/** /**
* 相似词列表 * 相似词列表
*/ */
......
...@@ -29,6 +29,11 @@ public class ProperNounTableDef extends TableDef { ...@@ -29,6 +29,11 @@ public class ProperNounTableDef extends TableDef {
*/ */
public final QueryColumn NAME = new QueryColumn(this, "name"); public final QueryColumn NAME = new QueryColumn(this, "name");
/**
* 中文拼音
*/
public final QueryColumn PINYIN = new QueryColumn(this, "pinyin");
/** /**
* 状态(0禁用 1启用) * 状态(0禁用 1启用)
*/ */
...@@ -62,7 +67,7 @@ public class ProperNounTableDef extends TableDef { ...@@ -62,7 +67,7 @@ public class ProperNounTableDef extends TableDef {
/** /**
* 默认字段,不包含逻辑删除或者 large 等字段。 * 默认字段,不包含逻辑删除或者 large 等字段。
*/ */
public final QueryColumn[] DEFAULT_COLUMNS = new QueryColumn[]{ID, NAME, SIMILAR_WORDS, STATUS, DELETED, CREATE_TIME, UPDATE_TIME}; public final QueryColumn[] DEFAULT_COLUMNS = new QueryColumn[]{ID, NAME, PINYIN, SIMILAR_WORDS, STATUS, DELETED, CREATE_TIME, UPDATE_TIME};
public ProperNounTableDef() { public ProperNounTableDef() {
super("", "ai_proper_noun"); super("", "ai_proper_noun");
......
package cn.breeze.elleai.util;
import cn.breeze.elleai.application.dto.inner.PinYin;
import cn.breeze.elleai.application.dto.inner.Term;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import java.util.ArrayList;
import java.util.List;
/**
* @author yangyw
*/
public class PinYinUtils {
/**
* 将内容的汉字转换为拼音
* @param hans
* @return
*/
public static String hansToPinYin(String hans) {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
StringBuilder sb = new StringBuilder();
char[] chars = hans.toCharArray();
for (char c : chars) {
if (Character.isWhitespace(c)) {
continue;
}
if (c >= '\u4e00' && c <= '\u9fa5') {
try {
String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
sb.append(pinyinArray[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
sb.append(c);
}
}
return sb.toString();
}
/**
* 将内容的汉字转换为拼音
* @param hans
* @return
*/
public static PinYin hansToPinYinObj(String hans) {
PinYin pinYin = new PinYin();
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
List<Term> terms = new ArrayList<>();
StringBuilder sb = new StringBuilder();
char[] chars = hans.toCharArray();
for (char c : chars) {
Term term = new Term();
if (Character.isWhitespace(c)) {
term.setHans(String.valueOf(c));
term.setPinyin("");
terms.add(term);
continue;
}
if (c >= '\u4e00' && c <= '\u9fa5') {
try {
String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
term.setHans(String.valueOf(c));
term.setPinyin(pinyinArray[0]);
terms.add(term);
sb.append(pinyinArray[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
sb.append(c);
}
}
pinYin.setSentence(hans);
pinYin.setPinyin(sb.toString());
pinYin.setTerms(terms);
return pinYin;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment