基于科大讯飞Android语音控制,实现模糊识别
基于怎么添加科大讯飞SDK,我在这里不再做过多的阐述,可以参考别人的方案,如https://blog.csdn.net/Liu_ser/article/details/80661603。
但是这只是从语音到文字的转换,转换之后如何要识别解析后的文字,还需要做后一步的处理。即字符模糊处理。打个比方:我要在APP里通过语音控制某个热水器。我会对网关喊"天狗天狗,打开热水器"(比如热水器网关取名为天狗)。但是对于普通话不标准的人,你喊天狗,语音识别后会显示”舔狗“,”天候“,”天沟“。。。。等等等等。。那就很难根据字符来做处理了。所以要做字符的模糊处理,当然可以依靠科大讯飞的特殊化定制,但那个需要钱的,出于成本考虑,可以通过以下二种方法处理。
一 在科大讯飞项目中添加热词:
-控制台--我的应用---语音听写---服务管理--上传热词
在这里添加热词以后,可以模糊匹配到你添加的热词上,但是这样做还不够,还是不能区分平舌翘舌,前鼻音后鼻音,因此还需要做模糊处理。
二 模糊处理:
思路是这样的:
1,将目标字符集转换为拼音;
2,获取科大讯飞语音识别的结果;
3,将识别结果转换为拼音;
4,在目标拼音集中进行拼音的匹配查找;
5,对于查找不到的,进行易混拼音的替换,再次与目标集合匹配查找;
6,对于还没有查找到的,去掉音调,再次与目标集合匹配查找;
7,将模糊匹配后的结果展现出来;
具体Demo如下:
public class PinyinSimilarity {
String[]englishPinYin26={
"EI1", "BI4", "SEI4", "DI4", "YI4", "EFU1", "JI4",
"EIQI1", "AI4", "JEI4", "KEI4", "EOU1", "EMEN1", "EN1",
"OU1", "PI1", "KIU1", "A4", "ESI1", "TI4",
"YOU4", "WEI4", "DABULIU3", "EKESI1", "WAI4", "ZEI4"
};
StringenglishString26="ABCDEFGHIJKLMNOPQRSTUVWXYZ";
StringnumberStringArabic="123456789";
StringnumberString="一二三四五六七八九十百";//可以打开热水器一,打开热水器二
StringspecialHanziString="天狗打开热水器";
StringmyCharAll =numberString +specialHanziString;
ListnumberPinYin=new ArrayList<>(20);//数字的拼音(10)
ListspecialHanziPinYin=new ArrayList(10);//特定汉字集的拼音(除了中文的数字之外的)
ListmyCharAllPinYin=new ArrayList(40);//所有拼音的集合
boolean fuzzyMatching=true;//是否开启模糊匹配功
public PinyinSimilarity(boolean fuzzyMatching){
this.fuzzyMatching = fuzzyMatching;
init();
}
public void init()
{
try{
String str;
HanyuPinyinOutputFormat format =new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
format.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER);
str =numberString;//数字
for (int i =0; i < str.length(); i++) {
char c = str.charAt(i);
String[] vals = PinyinHelper.toHanyuPinyinStringArray(c, format);
numberPinYin.add(vals[0]);
}
str =specialHanziString;//汉字
for (int i =0; i < str.length(); i++) {
char c = str.charAt(i);
String[] vals = PinyinHelper.toHanyuPinyinStringArray(c, format);
specialHanziPinYin.add(vals[0]);
}
myCharAllPinYin.addAll(numberPinYin);
myCharAllPinYin.addAll(specialHanziPinYin);
}catch (Exception e){
e.printStackTrace();
}
}
public StringchangeOurWordsWithPinyin(String input){
String output=input;
try{
//处理符号:不关注符合,遇到,就去掉(要保留小数点)
output = changeWordProcessSignal(output);
//处理英文字母:转大写
output = changeWordProcessEnglish(output);
//所有汉字进行相似替换
LogUtils.error("input.length()="+input.length());
int index;
String str;
String strChanged;
StringBuilder strBuilder =new StringBuilder();
for(index=0;index
str = input.substring(index,index+1);
strChanged = changeOneWord(str);
strBuilder.append(strChanged);
}
output=strBuilder.toString();
LogUtils.error("after changeAllWord: output="+output);
}catch (Exception e){
e.printStackTrace();
}
return output;
}
public StringchangeWordProcessSignal(String strInput){
String strOutput = strInput;
//去掉 ,。空格-
strOutput = strOutput.replace(",", "");
strOutput = strOutput.replace("。", "");
strOutput = strOutput.replace("-", "");
strOutput = strOutput.replace(" ", "");
return strOutput;
}
public StringchangeWordProcessEnglish(String strInput){
String strOutput = strInput;
//转大写
strOutput = strOutput.toUpperCase();
return strOutput;
}
//尾字如果是汉字,进行拼音相同字的替换(零不能替换,可以先转换为0)
public StringchangeOneWord(String strInput){
//若已经在目标集合中了,就不需要转换了
if(numberString.contains(strInput)||numberStringArabic.contains(strInput)){
LogUtils.error("is number");
return strInput;
}else if(specialHanziString.contains(strInput)){
LogUtils.error("is specialHanziString");
return strInput;
}
String strChanged;
List listEnglishPinYin =new ArrayList();
strChanged = changeWord(strInput, numberPinYin, numberString);
if(numberString.contains(strChanged)){
LogUtils.error("is number");
return strChanged;
}
return changeWord(strInput, specialHanziPinYin, specialHanziString);
}
private StringchangeWord(String strInput, List listPinYin, String strSource) {
//先判断输入,是什么类型的字符:数字、字母、汉字
String strOutput ="";
String str = strInput.substring(0, 1);
String strPinyin ="";
boolean flagGetPinyin =false;
try {
if (str.matches("^[A-Z]{1}$")) {
strPinyin =englishPinYin26[englishString26.indexOf(str)];
LogUtils.error("str=" + str +" Pinyin=" + strPinyin);
flagGetPinyin =true;
}else if (str.matches("^[0-9]{1}$")) {
strPinyin =numberPinYin.get(numberString.indexOf(str));
LogUtils.error("str=" + str +" Pinyin=" + strPinyin);
flagGetPinyin =true;
}else if (str.matches("^[\u4e00-\u9fa5]{1}$")) {
HanyuPinyinOutputFormat format =new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
format.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER);
char c = str.charAt(0);
String[] vals = PinyinHelper.toHanyuPinyinStringArray(c, format);
strPinyin = vals[0];//token.target;
flagGetPinyin =true;
}
if(flagGetPinyin) {
//在目标拼音集合中查找匹配项
int num = listPinYin.indexOf(strPinyin);
if (num >=0) {//拼音精确匹配成功
return strSource.substring(num, num +1);
}else {
if (fuzzyMatching) {//若开启了模糊匹配
//声母替换
String strPinyinFuzzy =new String(strPinyin);//避免修改原字符串
strPinyinFuzzy = replaceHeadString(strPinyinFuzzy);
boolean flagReplacedHeadString = (strPinyinFuzzy ==null) ?false :true;
if (flagReplacedHeadString) {
num = listPinYin.indexOf(strPinyinFuzzy);
if (num >=0) {//拼音模糊匹配成功
LogUtils.error("fuzzy match: " + strPinyinFuzzy +" num=" + num);
return strSource.substring(num, num +1);
}
}
//韵母替换
strPinyinFuzzy =new String(strPinyin);//避免修改原字符串,不使用声母替换后的字符串
strPinyinFuzzy = replaceTailString(strPinyinFuzzy);
boolean flagReplacedTailString = (strPinyinFuzzy ==null) ?false :true;
if (flagReplacedTailString) {
num = listPinYin.indexOf(strPinyinFuzzy);
if (num >=0) {//拼音模糊匹配成功
LogUtils.error("fuzzy match: " + strPinyinFuzzy +" num=" + num);
return strSource.substring(num, num +1);
}
}
//声母韵母都替换
if (flagReplacedHeadString && flagReplacedTailString) {
strPinyinFuzzy = replaceHeadString(strPinyinFuzzy);
num = listPinYin.indexOf(strPinyinFuzzy);
if (num >=0) {//拼音模糊匹配成功
LogUtils.error("fuzzy match: " + strPinyinFuzzy +" num=" + num);
return strSource.substring(num, num +1);
}
}
strPinyin=strPinyin.substring(0, strPinyin.length()-1);
strPinyinFuzzy =new String(strPinyin);//避免修改原字符串
num=findPinyin(strPinyinFuzzy,listPinYin);
if(num>=0){//拼音模糊匹配成功
return strSource.substring(num, num+1);
}
//声母替换
strPinyinFuzzy = replaceHeadString(strPinyinFuzzy);
flagReplacedHeadString = (strPinyinFuzzy==null)?false:true;
if(flagReplacedHeadString){
num=findPinyin(strPinyinFuzzy,listPinYin);
if(num>=0){//拼音模糊匹配成功
return strSource.substring(num, num+1);
}
}
//韵母替换
strPinyinFuzzy =new String(strPinyin);//避免修改原字符串,不使用声母替换后的字符串
strPinyinFuzzy = replaceTailString(strPinyinFuzzy);
flagReplacedTailString = (strPinyinFuzzy==null)?false:true;
if(flagReplacedTailString){
num=findPinyin(strPinyinFuzzy,listPinYin);
if(num>=0){//拼音模糊匹配成功
return strSource.substring(num, num+1);
}
}
//声母韵母都替换
if(flagReplacedHeadString && flagReplacedTailString){
strPinyinFuzzy = replaceHeadString(strPinyinFuzzy);
num=findPinyin(strPinyinFuzzy,listPinYin);
if(num>=0){//拼音模糊匹配成功
LogUtils.error("fuzzy match: "+strPinyinFuzzy+" num="+num);
return strSource.substring(num, num+1);
}
}
return str;
}else {
return str;
}
}
}else {//若该字符没有找到相应拼音,使用原字符
strOutput = strInput;
}
}catch (Exception e){
e.printStackTrace();
}
return strOutput;
}
private StringreplaceHeadString(String strPinyin){
//声母替换
String strReplaced =null;
if(strPinyin.contains("ZH")){
strReplaced = strPinyin.replace("ZH", "Z");
}else if(strPinyin.contains("CH")){
strReplaced = strPinyin.replace("CH", "C");
}else if(strPinyin.contains("SH")){
strReplaced = strPinyin.replace("SH", "S");
}
else if(strPinyin.contains("Z")){
strReplaced = strPinyin.replace("Z", "ZH");
}else if(strPinyin.contains("C")){
strReplaced = strPinyin.replace("C", "CH");
}else if(strPinyin.contains("S")){
strReplaced = strPinyin.replace("S", "SH");
}
else if(strPinyin.contains("L")){
strReplaced = strPinyin.replace("L", "N");
}else if(strPinyin.indexOf('N')==0){//n有在后面的,n只在做声母时易混
strReplaced = strPinyin.replace("N", "L");
}else {
return null;
}
LogUtils.error("strReplaced="+strReplaced);
return strReplaced;//flagReplaced;
}
private StringreplaceTailString(String strPinyin) {
// 韵母替换
String strReplaced =null;
if (strPinyin.contains("ANG")) {
strReplaced = strPinyin.replace("ANG", "AN");
}else if (strPinyin.contains("ENG")) {
strReplaced = strPinyin.replace("ENG", "EN");
}else if (strPinyin.contains("ING")) {
strReplaced = strPinyin.replace("ING", "IN");
}else if (strPinyin.contains("AN")) {
strReplaced = strPinyin.replace("AN", "ANG");
}else if (strPinyin.contains("EN")) {
strReplaced = strPinyin.replace("EN", "ENG");
}else if (strPinyin.contains("IN")) {
strReplaced = strPinyin.replace("IN", "ING");
}else {
return null;
}
return strReplaced;
}
private int findPinyin(String strPinyin, List listPinYin){
int num=0;
//在目标拼音集合中查找匹配项
for(String strTmp:listPinYin){
if(strTmp.contains(strPinyin) && strPinyin.length()==(strTmp.length()-1) ){
return num;
}
num++;
}
return -1;
}
}
调用:
StringchangeToOurWords(String input){
String output=input;
output =new PinyinSimilarity(true).changeOurWordsWithPinyin(output);
return output;
} 输出自己需要的字符