package cn.pconline.search.common.tools.segment;

import java.io.IOException;
import java.util.ArrayList;

import cn.pconline.search.common.tools.segment.bean.Atom;
import cn.pconline.search.common.tools.segment.bean.Dictionary;
import cn.pconline.search.common.tools.segment.bean.MidResult;
import cn.pconline.search.common.tools.segment.bean.SegNode;
import cn.pconline.search.common.tools.segment.bean.SegResult;
import cn.pconline.search.common.tools.segment.bean.Sentence;
import cn.pconline.search.common.tools.segment.utility.Utility;


public class Segmenter {
	private static Dictionary coreDict;

	private static Dictionary bigramDict;

	private static PosTagger personTagger;

	private static PosTagger transPersonTagger;

	private static PosTagger placeTagger;

	private static PosTagger lexTagger;
	
	
	static{
		try {
			System.out.println("Segmenter load coreDict  ...");
			coreDict = new Dictionary("yisou/Data/coreDict.dct");
			System.out.println("Segmenter load bigramDict ...");
			bigramDict = new Dictionary("yisou/Data/bigramDict.dct");

			System.out.println("Segmenter load tagger dict ...");
			personTagger = new PosTagger(Utility.TAG_TYPE.TT_PERSON, "yisou/Data/nr", coreDict);
			transPersonTagger = new PosTagger(Utility.TAG_TYPE.TT_TRANS_PERSON, "yisou/Data/tr", coreDict);
			placeTagger = new PosTagger(Utility.TAG_TYPE.TT_PLACE, "yisou/Data/ns", coreDict);
			lexTagger = new PosTagger(Utility.TAG_TYPE.TT_NORMAL, "yisou/Data/lexical", coreDict);
			System.out.println("Segmenter load dict is over");
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
		System.gc();
	}
	
	public Segmenter() {
	}
	
	public SegResult split(String src) {
	   return split(src,1);
	}
	
	public SegResult split(String src,int segPathCount) {
		SegResult sr = new SegResult(src);// 分词结果
		long spendTimeStart=System.currentTimeMillis();
		if (src != null) {
			int index = 0;
			sr.setRawContent(src);
			SentenceSeg ss=new SentenceSeg(src);
			ArrayList<Sentence> sens = ss.getSens();
			for (Sentence sen : sens) {
				try{
					MidResult mr = new MidResult();
					mr.setIndex(index++);
					mr.setSource(sen.getContent());
					if (sen.isSeg()) {
	
						// 原子分词
						AtomSeg as =new AtomSeg(sen.getContent());
						ArrayList<Atom> atoms = as.getAtoms();
						mr.setAtoms(atoms); 
						
						// 生成分词图表,先进行初步分词，然后进行优化，最后进行词性标记
						SegGraph segGraph = GraphGenerate.generate(atoms, coreDict);
						mr.setSegGraph(segGraph.getSnList());
						// 生成二叉分词图表
						SegGraph biSegGraph =GraphGenerate.biGenerate(segGraph, coreDict, bigramDict);
						mr.setBiSegGraph(biSegGraph.getSnList());
						
						
						// 求N最短路径
						NShortPath nsp = new NShortPath(biSegGraph, segPathCount);
						ArrayList<ArrayList<Integer>> bipath =nsp.getPaths();
						mr.setBipath(bipath);
						
						SegGraph optSegGraph = null;
						for (ArrayList<Integer> onePath : bipath) {
							// 得到初次分词路径
							ArrayList<SegNode> segPath =getSegPath(segGraph, onePath);
							ArrayList<SegNode> firstPath =AdjustSeg.firstAdjust(segPath);
							mr.addFirstSegPath(firstPath);
							
							if(optSegGraph==null){
								optSegGraph = new SegGraph(true,clone(firstPath));
							}else{
								for(SegNode sg:firstPath){
									optSegGraph.setElement(sg);
								}
							}
							
							// 处理未登陆词，进对初次分词结果进行优化
							ArrayList<SegNode> sns = clone(firstPath);
							personTagger.recognition(optSegGraph, sns);
							transPersonTagger.recognition(optSegGraph, sns);
							placeTagger.recognition(optSegGraph, sns);
						}
						mr.setOptSegGraph(optSegGraph.getSnList());
						
						// 根据优化后的结果，重新进行生成二叉分词图表
						SegGraph optBiSegGraph = GraphGenerate.biGenerate(optSegGraph, coreDict, bigramDict);
						mr.setOptBiSegGraph(optBiSegGraph.getSnList());
	
						// 重新求取N－最短路径
						NShortPath optNsp = new NShortPath(optBiSegGraph, 1);
						ArrayList<ArrayList<Integer>> optBipath = optNsp.getPaths();
						mr.setOptBipath(optBipath.get(0));
	
						// 生成优化后的分词结果，并对结果进行词性标记和最后的优化调整处理
						ArrayList<SegNode> adjResult = null;
						ArrayList<Integer> optOnePath=optBipath.get(0);
						ArrayList<SegNode> optSegPath = getSegPath(optSegGraph, optOnePath);
						lexTagger.recognition(optSegPath);
						mr.setOptSegPath(optSegPath);
						adjResult = AdjustSeg.finaAdjust(optSegPath, personTagger, placeTagger);
						mr.setFinalSegPath(adjResult);
						sr.addMidResult(mr);
					}else{
						sr.addMidResult(mr);
					}
				}catch(Throwable e){
				   throw new RuntimeException("split \""+sen.getContent()+"\"",e);
				}
			}
		}
		sr.setSpendTime(System.currentTimeMillis()-spendTimeStart);
		return sr;
	}

	private ArrayList<SegNode> clone(ArrayList<SegNode> sns) {
		ArrayList<SegNode> result = null;
		if (sns != null && sns.size() > 0) {
			result = new ArrayList<SegNode>();
			for (SegNode sn : sns)
				result.add(sn.clone());
		}
		return result;
	}

	// 根据二叉分词路径生成分词路径
	private ArrayList<SegNode> getSegPath(SegGraph sg, ArrayList<Integer> bipath) {

		ArrayList<SegNode> path = null;

		if (sg != null && bipath != null) {
			ArrayList<SegNode> sns = sg.getSnList();
			path = new ArrayList<SegNode>();

			for (int index : bipath)
				path.add(sns.get(index));

		}
		return path;
	}

}
