package cn.pconline.search.common.tools.homonym;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import cn.pconline.search.common.util.CNToSpell;

/**
 * 同音词词典
 * 
 * @author zengjie
 * @since 2013-9-16
 * @see
 */
public class Homonyms
{

    private static Logger logger = Logger.getLogger(Homonyms.class);

    private Map<String, String> dataMap = null;

    private Set<String> set = null;

    public Homonyms(String fileNamesStr)
    {
        this.init(fileNamesStr);
    }

    /**
     * 获取跟输入同音的所有关键词的字符串表示
     * <p>
     * 调用前请确认是否已经调用了init方法进行初始化
     * </p>
     * 
     * @param str
     *            要查的关键词
     * @return 所有同音关键词的字符串表示
     * @deprecated use {@link #getHomonymsArr(String)}
     */
    @Deprecated
    public String getHomonyms(String str)
    {

        if (dataMap == null)
        {
            return null;
        }
        String ret = (String) dataMap.get(CNToSpell.getFullSpell(str));
        return ret == null ? "" : ret;

    }

    /**
     * 获取指定字符串的同音词数组
     * 
     * @param q
     * @return 不包含原词的同音词数组
     */
    public String[] getHomonymsArr(String q)
    {
        if (StringUtils.isBlank(q))
        {
            return null;
        }
        String homonyns = getHomonyms(q);
        if (StringUtils.isNotEmpty(homonyns))
        {
            String[] arr = homonyns.split(";");
            List<String> temp = null;
            for (String h : arr)
            {
                if (h.equals(q))
                {
                    if (arr.length == 1)
                    {
                        return null;
                    }
                    if (temp == null)
                    {
                        temp = new ArrayList<String>(arr.length);
                        CollectionUtils.addAll(temp, arr);
                    }
                    temp.remove(h);
                }
            }
            if (temp != null)
            {
                return temp.size() == 0 ? null : temp.toArray(new String[temp
                        .size()]);
            }
            return arr;
        }
        return null;
    }

    /**
     * 验证是否为域名
     * 
     * @param src
     * @return
     */
    public static boolean isDomain(String src)
    {
        String regx = "([\\w-]+\\.)+[\\w-]+(/[\\w-\\.?%=]*)*";
        if (src.matches(regx))
        {
            return true;
        }
        else
        {
            return false;
        }
    }

    /**
     * 在获取同音词方法之前先把要作为同音词的词库load到内存，传入的文件字符如果表示多个文件用";"分割
     */
    private void init(String fileNamesStr)
    {
        dataMap = new HashMap<String, String>();
        set = new HashSet<String>();
        if (fileNamesStr != null)
        {
            String[] fileNames = fileNamesStr.split(";");
            for (int i = 0; i < fileNames.length; i++)
            {
                loadFromFile(fileNames[i]);
            }
        }
        set = null;
    }

    private void loadFromFile(String fileName)
    {
        BufferedReader reader = null;
        try
        {
            logger.debug("load homonyms file [" + fileName
                    + "] from current classpath");
            InputStream in = Homonyms.class.getClassLoader()
                    .getResourceAsStream(fileName);
            reader = new BufferedReader(new InputStreamReader(in, "gbk"));
            String line = null;
            while ((line = reader.readLine()) != null)
            {
                if (set.contains(line))
                {
                    continue;
                }
                else
                {
                    set.add(line);
                }
                String spell = CNToSpell.getFullSpell(line);
                String str = (String) dataMap.get(spell);
                str = str != null ? (str + ";" + line) : line;
                dataMap.put(spell, str);
            }
        }
        catch (Exception e)
        {
            throw new RuntimeException("读取文件" + fileName + "发生异常！", e);
        }
        finally
        {
            try
            {
                if (reader != null)
                {
                    reader.close();
                }
            }
            catch (Exception e)
            {
            }
        }
    }

}
