package cn.com.pconline.adclick.pipeline;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.feature.Word2Vec;
import org.apache.spark.mllib.feature.Word2VecModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/* loaded from: input_file:cn/com/pconline/adclick/pipeline/RowWord2Vec.class */
public class RowWord2Vec implements RowPipelineStage, Serializable {
    private static final long serialVersionUID = 558359004643344657L;
    private String inputCol;
    private String traindataSQL;
    private double learningRate;
    private int maxSentenceLength;
    private int minCount;
    private int numIterations;
    private int numPartitions;
    private long seed;
    private int vectorSize;
    private int window;
    private Word2VecModel model;
    private boolean fitFlag;

    public RowWord2Vec(String str, String str2) {
        this.learningRate = 0.8d;
        this.maxSentenceLength = 1000;
        this.minCount = 1;
        this.numIterations = 1;
        this.numPartitions = 1;
        this.seed = 42L;
        this.vectorSize = 100;
        this.window = 10;
        this.fitFlag = true;
        this.inputCol = str;
        this.traindataSQL = str2;
    }

    public RowWord2Vec(String str, Word2VecModel word2VecModel) {
        this.learningRate = 0.8d;
        this.maxSentenceLength = 1000;
        this.minCount = 1;
        this.numIterations = 1;
        this.numPartitions = 1;
        this.seed = 42L;
        this.vectorSize = 100;
        this.window = 10;
        this.fitFlag = true;
        this.inputCol = str;
        this.model = word2VecModel;
        this.fitFlag = false;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public void fit(Dataset<Row> dataset) throws Exception {
        if (this.fitFlag) {
            JavaRDD map = dataset.sparkSession().sql(this.traindataSQL).javaRDD().map(new Function<Row, List<String>>() { // from class: cn.com.pconline.adclick.pipeline.RowWord2Vec.1
                private static final long serialVersionUID = -2421132145152416696L;

                public List<String> call(Row row) throws Exception {
                    return row.getList(0);
                }
            });
            Word2Vec word2Vec = new Word2Vec();
            word2Vec.setLearningRate(this.learningRate);
            word2Vec.setMaxSentenceLength(this.maxSentenceLength);
            word2Vec.setMinCount(this.minCount);
            word2Vec.setNumIterations(this.numIterations);
            word2Vec.setNumPartitions(this.numPartitions);
            word2Vec.setSeed(this.seed);
            word2Vec.setVectorSize(this.vectorSize);
            word2Vec.setWindowSize(this.window);
            this.model = word2Vec.fit(map);
        }
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public Vector transform(Row row) throws Exception {
        Vector zeros;
        if (this.model == null) {
            throw new Exception("RowWord2Vec word2vec model has not been trained");
        }
        try {
            zeros = this.model.transform(row.getString(row.fieldIndex(this.inputCol)));
        } catch (IllegalStateException e) {
            zeros = Vectors.zeros(this.vectorSize);
        }
        return zeros;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public int resultLength() {
        return this.vectorSize;
    }

    public String getInputCol() {
        return this.inputCol;
    }

    public void setInputCol(String str) {
        this.inputCol = str;
    }

    public String getTraindataSQL() {
        return this.traindataSQL;
    }

    public void setTraindataSQL(String str) {
        this.traindataSQL = str;
    }

    public double getLearningRate() {
        return this.learningRate;
    }

    public void setLearningRate(double d) {
        this.learningRate = d;
    }

    public int getMaxSentenceLength() {
        return this.maxSentenceLength;
    }

    public void setMaxSentenceLength(int i) {
        this.maxSentenceLength = i;
    }

    public int getMinCount() {
        return this.minCount;
    }

    public void setMinCount(int i) {
        this.minCount = i;
    }

    public int getNumIterations() {
        return this.numIterations;
    }

    public void setNumIterations(int i) {
        this.numIterations = i;
    }

    public int getNumPartitions() {
        return this.numPartitions;
    }

    public void setNumPartitions(int i) {
        this.numPartitions = i;
    }

    public long getSeed() {
        return this.seed;
    }

    public void setSeed(long j) {
        this.seed = j;
    }

    public int getVectorSize() {
        return this.vectorSize;
    }

    public void setVectorSize(int i) {
        this.vectorSize = i;
    }

    public int getWindow() {
        return this.window;
    }

    public void setWindow(int i) {
        this.window = i;
    }

    public Word2VecModel getModel() {
        return this.model;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public List<Set<String>> inputsForOutputs() {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < this.vectorSize; i++) {
            HashSet hashSet = new HashSet();
            hashSet.add(this.inputCol);
            arrayList.add(hashSet);
        }
        return arrayList;
    }
}
