pinyin4j实现汉字转拼音

2017-01-25  本文已影响0人  Pines_

说明本文汉字转换汉语拼音工具类 支持多音字、保留其他字符

源代码
https://github.com/whitePines/pinyinTip.git

工具类

package com.test.caoxs.pinyinTest;

import java.util.ArrayList;
import java.util.HashSet;

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

/**
 * 汉字转换汉语拼音工具类 支持多音字、保留其他字符
 * 
 * @author whitePines
 *
 */
public class PinYinUtil {
    public enum TypeEnum {
        py, piny, pinyin;
    }
    /**
     * 按照传入的格式,获取传入字符串的所有可能性。
     * @param chineseStr
     * @param type
     * @return
     */
    public static HashSet<String> allPossiblePys(String chineseStr, TypeEnum type) {
        HashSet<String> allPossiblePys = new HashSet<String>();
        allPossiblePys.add("");
        if(chineseStr == null || "".equals(chineseStr)){
            return allPossiblePys;
        }
        ArrayList<String[]> list = getStringPys(chineseStr, type);
        HashSet<String> tmp = new HashSet<String>();
        for (String[] strs : list) {
            HashSet<String> tmpSet = new HashSet<String>();
            for (String s : strs) {
                for (String nowResult : allPossiblePys) {
                    nowResult = nowResult + s;
                    tmpSet.add(nowResult);
                }
            }
            allPossiblePys = tmpSet;
        }
        return allPossiblePys;
    }

    private static ArrayList<String[]> getStringPys(String chineseStr, TypeEnum type) {
        char[] chars = chineseStr.toCharArray();
        ArrayList<String[]> pinyinList = new ArrayList<String[]>(chars.length);
        for (int i = 0; i < chars.length; i++) {
            String[] strs = null;
            char c = chars[i];
            strs = getCharPYs(c, i, type);
            pinyinList.add(strs);
        }
        return pinyinList;
    }

    private static String[] getCharPYs(char c, int index, TypeEnum type) {
        String[] strs = null;
        switch (type) {
        case py:
            strs = getPy(c);
            break;
        case piny:
            strs = index == 0 ? getPinyin(c) : getPy(c);
            break;
        case pinyin:
            strs = getPinyin(c);
            break;
        default:
            strs = getPinyin(c);
            break;
        }

        return strs;
    }
    
    public static String[] getPy(char chineseChar) {
        String[] strs = turnProcess(chineseChar);
        for (int i = 0; i < strs.length; i++) {
            strs[i] = strs[i].substring(0, 1);
        }
        return strs;
    }

    public static String[] getPinyin(char chineseChar) {
        String[] strs = turnProcess(chineseChar);
        return strs;

    }

    private static String[] turnProcess(char chineseChar) {
        HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        String[] strs = null;
        try {
            strs = PinyinHelper.toHanyuPinyinStringArray(chineseChar, defaultFormat);
        } catch (BadHanyuPinyinOutputFormatCombination e) {
            e.printStackTrace();
        }
        //对不可以进行拼音转换的字符串进行,保留原字符的处理
        if (strs == null || strs.length == 0) {
            strs = new String[1];
            strs[0] = chineseChar + "";
        }
        return strs;
    }
}

测试类

package com.test.caoxs.pinyinTest;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;

import org.junit.Test;

import com.test.caoxs.pinyinTest.PinYinUtil.TypeEnum;

public class PinYinUtilTester {
    @Test
    public void testChinesePunctuation() {
        String[] ss = PinYinUtil.getPy('!');
        for (String s : ss) {
            System.out.println(s);
        }
        System.out.println("--------------------------------------");
    }

    @Test
    public void testGetCharPy() {
        String[] ss = PinYinUtil.getPy('都');
        for (String s : ss) {
            System.out.println(s);
        }
        System.out.println("--------------------------------------");
    }

    @Test
    public void testGetCharPinyin() {
        String[] ss = PinYinUtil.getPinyin('都');
        for (String s : ss) {
            System.out.println(s);
        }
        System.out.println("--------------------------------------");
    }

    @Test
    public void testGetStrPy() {
        HashSet<String> pys = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.py);
        int i = 1;
        for (String py : pys) {
            System.out.println("py格式的第" + i + "条" + py);
            i++;
        }
        System.out.println("--------------------------------------");

    }

    @Test
    public void testGetStrPiny() {
        HashSet<String> pinys = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.piny);
        int i = 1;
        for (String piny : pinys) {
            System.out.println("piny格式的第" + i + "条" + piny);
            i++;
        }
        System.out.println("--------------------------------------");

    }

    @Test
    public void testGetStrPiny2() {
        HashSet<String> pinys = PinYinUtil.allPossiblePys("!都在中华人民共和国", TypeEnum.piny);
        int i = 1;
        for (String piny : pinys) {
            System.out.println("piny格式的第" + i + "条" + piny);
            i++;
        }
        System.out.println("--------------------------------------");

    }

    @Test
    public void testGetStrPiny3() {
        HashSet<String> pinys = PinYinUtil.allPossiblePys("!<>《》_102都在中华人民共和国", TypeEnum.piny);
        int i = 1;
        for (String piny : pinys) {
            System.out.println("piny格式的第" + i + "条" + piny);
            i++;
        }
        System.out.println("--------------------------------------");

    }

    @Test
    public void testGetStrPinyin() {
        HashSet<String> pinyins = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.pinyin);
        int i = 1;
        for (String pinyin : pinyins) {
            System.out.println("pinyin格式的第" + i + "条" + pinyin);
            i++;
        }
        System.out.println("--------------------------------------");

    }
    
    @Test
    public void testGetNullStrPinyin() {
        HashSet<String> pinyins = PinYinUtil.allPossiblePys("", TypeEnum.pinyin);
        int i = 1;
        for (String pinyin : pinyins) {
            System.out.println("pinyin格式的第" + i + "条" + pinyin);
            i++;
        }
        System.out.println("--------------------------------------");
    }
    @Test
    public void testGetDataPinyin(){
        System.out.println("testGetDataPinyin--------------------------------------begin");
        Data d= new Data();
        ArrayList<String> dList = d.strs;
        HashMap<String,HashSet<String>> pyMap = new HashMap<String, HashSet<String>>();
        HashMap<String,HashSet<String>> pinyMap = new HashMap<String, HashSet<String>>();
        HashMap<String,HashSet<String>> pinyinMap = new HashMap<String, HashSet<String>>();
        for(String s : dList){
            HashSet<String> pys = PinYinUtil.allPossiblePys(s, TypeEnum.py);
            HashSet<String> pinys = PinYinUtil.allPossiblePys(s, TypeEnum.piny);
            HashSet<String> pinyins = PinYinUtil.allPossiblePys(s, TypeEnum.pinyin);
            pyMap.put(s, pys);
            pinyMap.put(s, pinys);
            pinyinMap.put(s, pinyins);
        }
        System.out.println("----------->py");
        for(String s : dList){
            for(String py : pyMap.get(s)){
                System.out.println(s + "-->" + py);
            }
        }
        System.out.println("----------->piny");
        for(String s : dList){
            for(String piny : pinyMap.get(s)){
                System.out.println(s + "-->" + piny);
            }
        }
        System.out.println("----------->pinyin");
        for(String s : dList){
            for(String pinyin : pinyinMap.get(s)){
                System.out.println(s + "-->" + pinyin);
            }
        }
        
        System.out.println("testGetDataPinyin--------------------------------------end");

    }

}

Data类

package com.test.caoxs.pinyinTest;

import java.util.ArrayList;

public class Data {
    ArrayList<String> strs = new ArrayList<String>();
    Data(){
        strs.add("work");
        strs.add("log");
        strs.add("maxiang");
        strs.add("diary");
        strs.add("work1");
        strs.add("DB");
        strs.add("git");
        strs.add("mix");
        strs.add("工作");
        strs.add("学习");
        strs.add("杂");
        strs.add("马克飞象");
        strs.add("日记");
        strs.add("兴趣");
        strs.add("项目");
        strs.add("附件");
        strs.add("开发工具");
    }
}

笔者对粘贴大段代码是深恶痛绝的,然而,到自己开始写的时候竟然也这么做了。真该拉出去枪毙。

本文略显得单薄了。毕竟只是临时写的。如果,还会用到pinyin4j会在这个基础上新增和修改内容。

上一篇下一篇

猜你喜欢

热点阅读