Commit cb8d2a58 authored by 陈立彬's avatar 陈立彬

易错词替换

parent 30105aa8
package cn.breeze.elleai.application.dto.inner;
import lombok.Data;
import java.io.Serializable;
import java.util.List;
/**
* @author yangyw
*/
@Data
public class PinYin implements Serializable {
/**
* 原句子
*/
private String sentence;
/**
* 转化后的拼音
*/
private String pinyin;
/**
* 对照
*/
private List<Term> terms;
}
package cn.breeze.elleai.application.dto.inner;
import lombok.Data;
import java.io.Serializable;
/**
* @author YANGYW
*/
@Data
public class Term implements Serializable {
private String hans;
private String pinyin;
}
......@@ -20,6 +20,12 @@ public class ProperNounSaveDto implements Serializable {
@Schema(description = "专有名词名称")
private String name;
/**
* 中文拼音
*/
@Schema(description = "中文拼音")
private String pinyin;
@Schema(description = "相似词列表")
@JsonProperty("similar_word_list")
private List<String> similarWordList;
......
......@@ -20,6 +20,11 @@ public class ProperNounDto implements Serializable {
*/
private String name;
/**
* 中文拼音
*/
private String pinyin;
@Schema(description = "相似词列表")
@JsonProperty("similar_word_list")
private List<String> similarWordList = new ArrayList<>();
......
package cn.breeze.elleai.application.service;
import cn.breeze.elleai.application.dto.PageResult;
import cn.breeze.elleai.application.dto.inner.PinYin;
import cn.breeze.elleai.application.dto.inner.Term;
import cn.breeze.elleai.application.dto.request.ProperNounRequestDto;
import cn.breeze.elleai.application.dto.request.ProperNounSaveDto;
import cn.breeze.elleai.application.dto.response.AppRoleDto;
......@@ -13,14 +15,16 @@ import cn.breeze.elleai.domain.sparring.model.response.ProperNounResponseModel;
import cn.breeze.elleai.domain.sparring.service.CommonService;
import cn.breeze.elleai.util.ChineseCharacterUtil;
import cn.breeze.elleai.util.Codes;
import cn.breeze.elleai.util.PinYinUtils;
import cn.hutool.core.bean.BeanUtil;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.mybatisflex.core.paginate.Page;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
......@@ -31,6 +35,7 @@ import java.util.stream.Collectors;
/**
* 应用服务
*/
@Slf4j
@Component
@RequiredArgsConstructor
public class AppCommonService {
......@@ -77,21 +82,10 @@ public class AppCommonService {
*/
public void saveproperNoun(ProperNounSaveDto dto) {
if(Objects.nonNull(dto)) {
String upperCase = ChineseCharacterUtil.getUpperCase(dto.getName(), false);
String upperCase2 = ChineseCharacterUtil.getUpperCase(dto.getName(), true);
if(CollectionUtil.isNotEmpty(dto.getSimilarWordList())) {
dto.getSimilarWordList().forEach(v -> {
System.out.println(v + "==" +ChineseCharacterUtil.getUpperCase(dto.getName(), true));
});
}
} else {
ProperNounSaveModel model = BeanUtil.copyProperties(dto, ProperNounSaveModel.class);
model.setSimilarWords(JSON.toJSONString(dto.getSimilarWordList()));
commonService.saveProperNoun(model);
}
ProperNounSaveModel model = BeanUtil.copyProperties(dto, ProperNounSaveModel.class);
model.setSimilarWords(JSON.toJSONString(dto.getSimilarWordList()));
model.setPinyin(PinYinUtils.hansToPinYin(dto.getName()));
commonService.saveProperNoun(model);
}
......@@ -123,6 +117,34 @@ public class AppCommonService {
return pageResult;
}
/**
* 专有名词列表
* @return
*/
public List<ProperNounDto> properNounList() {
List<ProperNounDto> resultList = null;
ProperNounRequestModel requestModel = new ProperNounRequestModel();
requestModel.setStatus(1);
List<ProperNounResponseModel> modelList = commonService.properNounList(requestModel);
if(CollectionUtil.isNotEmpty(modelList)) {
resultList = modelList.stream().map(v -> {
ProperNounDto dto = BeanUtil.copyProperties(v, ProperNounDto.class);
if(StrUtil.isNotEmpty(v.getSimilarWords())) {
dto.setSimilarWordList(JSONObject.parseArray(v.getSimilarWords(), String.class));
}
return dto;
}).collect(Collectors.toList());
}
return resultList;
}
/**
* 知识库分类列表
* @return
......@@ -148,4 +170,73 @@ public class AppCommonService {
}
return null;
}
/**
* 易错词纠正
* @param sentence
* @return
*/
public String sentenceWordCorrect(String sentence) {
PinYin pinYin = PinYinUtils.hansToPinYinObj(sentence);
String corrected = pinYin.getSentence();
List<ProperNounDto> filterWords = this.properNounList();
if (CollUtil.isNotEmpty(filterWords)) {
//先判断转化后的拼音句子是否包含了过滤词的拼音
List<ProperNounDto> found = new ArrayList<>();
for (ProperNounDto filterWord : filterWords) {
if (StrUtil.contains(pinYin.getPinyin(), filterWord.getPinyin())) {
found.add(filterWord);
}
}
if (CollUtil.isNotEmpty(found)) {
//需要纠正易错词
List<Term> terms = pinYin.getTerms();
for (ProperNounDto word : found) {
String py = word.getPinyin();
List<String> samples = word.getSimilarWordList();
StringBuilder sb = new StringBuilder();
StringBuilder hans = new StringBuilder();
for (Term term : terms) {
//拼音匹配开始
if (StrUtil.startWith(py, term.getPinyin())) {
sb.append(term.getPinyin());
hans.append(term.getHans());
} else {
//判断是否空格,若为空格,需要支持忽略后匹配并替换
String current;
if (StrUtil.isBlank(term.getPinyin())) {
if (!sb.isEmpty()) {
hans.append(term.getHans());
}
} else {
sb.append(term.getPinyin());
current = sb.toString();
if (StrUtil.equals(py, current)) {
//拼接词的拼音等于易错词的拼音或者是易错词的拼音一部分
hans.append(term.getHans());
break;
//结束匹配,把word作为
} else if (StrUtil.startWith(py, current)) {
hans.append(term.getHans());
} else {
//匹配失败,重置缓存
sb = new StringBuilder();
hans = new StringBuilder();
}
}
}
}
if (!sb.isEmpty()) {
log.info("匹配易错词[{}],当前匹配汉字为:{}, 拼音为:{}", word.getName(), hans, sb);
String hansStr = hans.toString().replace(" ", "");
if (CollUtil.contains(samples, hansStr)) {
corrected = StrUtil.replaceFirst(corrected, hans.toString(), word.getName());
}
}
}
}
}
return corrected;
}
}
......@@ -6,6 +6,7 @@ import cn.breeze.elleai.application.dto.PageResult;
import cn.breeze.elleai.application.dto.request.*;
import cn.breeze.elleai.application.dto.response.*;
import cn.breeze.elleai.application.service.AppChatCompletionService;
import cn.breeze.elleai.application.service.AppCommonService;
import cn.breeze.elleai.config.QueryParam;
import cn.breeze.elleai.util.UserPrincipal;
import com.alibaba.fastjson.JSON;
......@@ -23,6 +24,7 @@ import org.springframework.web.bind.annotation.*;
public class ChatCompletionMobileController {
private final AppChatCompletionService chatCompletionService;
private final AppCommonService commonService;
@Operation(summary = "助手列表")
@GetMapping("/assistant/list")
......@@ -54,6 +56,7 @@ public class ChatCompletionMobileController {
@PostMapping("/ask")
public ApiResponse<UserAskResultMobileDto> ask(@Parameter(hidden = true) UserPrincipal userPrincipal,
@RequestBody UserQaMobileRequestDto request) {
request.setContent(commonService.sentenceWordCorrect(request.getContent()));
UserAskResultMobileDto result = chatCompletionService.userAsk(userPrincipal, request);
return ApiResponse.ok(result);
}
......
......@@ -20,6 +20,8 @@ public class ProperNounSaveModel implements Serializable {
*/
private String name;
private String pinyin;
/**
* 相似词
*/
......
......@@ -16,6 +16,11 @@ public class ProperNounResponseModel implements Serializable {
*/
private String name;
/**
* 中文拼音
*/
private String pinyin;
/**
* 相似词列表
*/
......
......@@ -66,6 +66,9 @@ public class CommonServiceImpl implements CommonService{
if(StrUtil.isNotEmpty(request.getName())) {
queryWrapper.where(PROPER_NOUN_ENTITY.NAME.like("%"+request.getName()+"%"));
}
if(Objects.nonNull(request.getStatus())) {
queryWrapper.where(PROPER_NOUN_ENTITY.STATUS.eq(request.getStatus()));
}
queryWrapper.orderBy(PROPER_NOUN_ENTITY.CREATE_TIME, false);
return properNounMapper.selectListByQueryAs(queryWrapper, ProperNounResponseModel.class);
......
......@@ -37,6 +37,11 @@ public class ProperNounEntity implements Serializable {
*/
private String name;
/**
* 中文拼音
*/
private String pinyin;
/**
* 相似词列表
*/
......
......@@ -29,6 +29,11 @@ public class ProperNounTableDef extends TableDef {
*/
public final QueryColumn NAME = new QueryColumn(this, "name");
/**
* 中文拼音
*/
public final QueryColumn PINYIN = new QueryColumn(this, "pinyin");
/**
* 状态(0禁用 1启用)
*/
......@@ -62,7 +67,7 @@ public class ProperNounTableDef extends TableDef {
/**
* 默认字段,不包含逻辑删除或者 large 等字段。
*/
public final QueryColumn[] DEFAULT_COLUMNS = new QueryColumn[]{ID, NAME, SIMILAR_WORDS, STATUS, DELETED, CREATE_TIME, UPDATE_TIME};
public final QueryColumn[] DEFAULT_COLUMNS = new QueryColumn[]{ID, NAME, PINYIN, SIMILAR_WORDS, STATUS, DELETED, CREATE_TIME, UPDATE_TIME};
public ProperNounTableDef() {
super("", "ai_proper_noun");
......
package cn.breeze.elleai.util;
import cn.breeze.elleai.application.dto.inner.PinYin;
import cn.breeze.elleai.application.dto.inner.Term;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import java.util.ArrayList;
import java.util.List;
/**
* @author yangyw
*/
public class PinYinUtils {
/**
* 将内容的汉字转换为拼音
* @param hans
* @return
*/
public static String hansToPinYin(String hans) {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
StringBuilder sb = new StringBuilder();
char[] chars = hans.toCharArray();
for (char c : chars) {
if (Character.isWhitespace(c)) {
continue;
}
if (c >= '\u4e00' && c <= '\u9fa5') {
try {
String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
sb.append(pinyinArray[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
sb.append(c);
}
}
return sb.toString();
}
/**
* 将内容的汉字转换为拼音
* @param hans
* @return
*/
public static PinYin hansToPinYinObj(String hans) {
PinYin pinYin = new PinYin();
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
List<Term> terms = new ArrayList<>();
StringBuilder sb = new StringBuilder();
char[] chars = hans.toCharArray();
for (char c : chars) {
Term term = new Term();
if (Character.isWhitespace(c)) {
term.setHans(String.valueOf(c));
term.setPinyin("");
terms.add(term);
continue;
}
if (c >= '\u4e00' && c <= '\u9fa5') {
try {
String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
term.setHans(String.valueOf(c));
term.setPinyin(pinyinArray[0]);
terms.add(term);
sb.append(pinyinArray[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
sb.append(c);
}
}
pinYin.setSentence(hans);
pinYin.setPinyin(sb.toString());
pinYin.setTerms(terms);
return pinYin;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment