package cn.com.pconline.adclick.pipeline;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;

/* loaded from: input_file:cn/com/pconline/adclick/pipeline/RowOneHotEncoder.class */
public class RowOneHotEncoder implements RowPipelineStage, Serializable {
    private static final long serialVersionUID = 8204523120185924015L;
    private String inputCol;
    private int size = 0;
    private boolean dropLast = false;

    public RowOneHotEncoder(String str) {
        this.inputCol = str;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public void fit(Dataset<Row> dataset) {
        int i = ((Row) dataset.groupBy(new Column[0]).agg(functions.max(dataset.col(this.inputCol).cast(DataTypes.IntegerType)), new Column[0]).first()).getInt(0) + 1;
        if (this.dropLast) {
            i--;
        }
        this.size = i;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public Vector transform(Row row) {
        int i = row.getInt(row.fieldIndex(this.inputCol));
        return i < this.size ? Vectors.sparse(this.size, new int[]{i}, new double[]{1.0d}) : Vectors.sparse(this.size, new int[0], new double[0]);
    }

    public int size() {
        return this.size;
    }

    public String getInputCol() {
        return this.inputCol;
    }

    public void setInputCol(String str) {
        this.inputCol = str;
    }

    public boolean isDropLast() {
        return this.dropLast;
    }

    public void setDropLast(boolean z) {
        this.dropLast = z;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public int resultLength() {
        return this.size;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public List<Set<String>> inputsForOutputs() {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < this.size; i++) {
            HashSet hashSet = new HashSet();
            hashSet.add(String.valueOf(this.inputCol) + "_" + i);
            arrayList.add(hashSet);
        }
        return arrayList;
    }
}
