package cn.pconline.search.common.tools.segment;

import java.util.ArrayList;

import cn.pconline.search.common.tools.segment.bean.Sentence;
import cn.pconline.search.common.tools.segment.utility.GFString;
import cn.pconline.search.common.tools.segment.utility.Utility;

public class SentenceSeg {
	private String src;

	private ArrayList<Sentence> sens;

	public SentenceSeg(String src) {
		this.src = src;
		sens = split();
	}

	/**
	 * 进行句子分隔
	 * 
	 * @param src
	 * @return
	 */
	private ArrayList<Sentence> split() {
		ArrayList<Sentence> result = null;

		if (src != null) {
			result = new ArrayList<Sentence>();
			StringBuffer buffer = new StringBuffer();
			buffer.append(Utility.SENTENCE_BEGIN);
			String[] ss = GFString.atomSplit(src);

			for (int i = 0; i < ss.length; i++) {
				// 如果是分隔符，比如回车换行/逗号等
				if (Utility.SEPERATOR_C_SENTENCE.indexOf(ss[i]) != -1 || Utility.SEPERATOR_LINK.indexOf(ss[i]) != -1
						|| Utility.SEPERATOR_C_SUB_SENTENCE.indexOf(ss[i]) != -1
						|| Utility.SEPERATOR_E_SUB_SENTENCE.indexOf(ss[i]) != -1) {
					// 如果不是回车换行和空格
					if (Utility.SEPERATOR_LINK.indexOf(ss[i]) == -1)
						buffer.append(ss[i]);
					// 断句
					if (buffer.length() > 0 && !Utility.SENTENCE_BEGIN.equals(buffer.toString())) {
						if (Utility.SEPERATOR_C_SUB_SENTENCE.indexOf(ss[i]) == -1
								&& Utility.SEPERATOR_E_SUB_SENTENCE.indexOf(ss[i]) == -1)
							buffer.append(Utility.SENTENCE_END);

						result.add(new Sentence(buffer.toString(), true));
						buffer.setLength(0);
						buffer.append(Utility.SENTENCE_BEGIN);
					}

					// 是回车换行符或空格，则不需要进行分析处理
					if (Utility.SEPERATOR_LINK.indexOf(ss[i]) != -1) {
						result.add(new Sentence(ss[i]));
					}
				} else{
					buffer.append(ss[i]);
				}
			}

			if (buffer.length() > 0 && !Utility.SENTENCE_BEGIN.equals(buffer.toString())) {
				buffer.append(Utility.SENTENCE_END);
				result.add(new Sentence(buffer.toString(), true));
			}
		}
		return result;
	}

	public ArrayList<Sentence> getSens() {
		return sens;
	}

}
