package cn.com.pconline.adclick.pipeline;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.api.java.UDF1;
import scala.Tuple2;
import scala.collection.JavaConverters;

/* loaded from: input_file:cn/com/pconline/adclick/pipeline/CategoryChoose.class */
public class CategoryChoose implements RowPipelineStage, Serializable {
    private static final long serialVersionUID = 516611903545371688L;
    private Map<String, Integer> categoryMap = new HashMap();
    private String inputCol;
    private String sql;

    /* loaded from: input_file:cn/com/pconline/adclick/pipeline/CategoryChoose$CategoryChooseUDF.class */
    public class CategoryChooseUDF implements UDF1<scala.collection.immutable.Map<String, Double>, Vector> {
        private static final long serialVersionUID = 1;

        public CategoryChooseUDF() {
        }

        public Vector call(scala.collection.immutable.Map<String, Double> map) throws Exception {
            return CategoryChoose.this.transform(map);
        }
    }

    public CategoryChoose(String str, String str2) {
        this.inputCol = str;
        this.sql = str2;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public void fit(Dataset<Row> dataset) {
        this.categoryMap.clear();
        List collectAsList = dataset.sparkSession().sql(this.sql).map(new MapFunction<Row, String>() { // from class: cn.com.pconline.adclick.pipeline.CategoryChoose.1
            private static final long serialVersionUID = -604955035866062909L;

            public String call(Row row) throws Exception {
                return row.getString(0);
            }
        }, Encoders.STRING()).collectAsList();
        for (int i = 0; i < collectAsList.size(); i++) {
            this.categoryMap.put((String) collectAsList.get(i), Integer.valueOf(i));
        }
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public int resultLength() {
        return this.categoryMap.size();
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public Vector transform(Row row) throws Exception {
        Object obj = row.get(row.fieldIndex(this.inputCol));
        if (obj instanceof scala.collection.Map) {
            return transform((Map<String, Double>) JavaConverters.mapAsJavaMapConverter((scala.collection.Map) obj).asJava());
        }
        throw new Exception("categorychoose input error" + obj.getClass());
    }

    public Vector transform(Map<String, Double> map) {
        int size = this.categoryMap.size();
        ArrayList arrayList = new ArrayList();
        for (Map.Entry<String, Double> entry : map.entrySet()) {
            if (this.categoryMap.containsKey(entry.getKey())) {
                arrayList.add(new Tuple2(this.categoryMap.get(entry.getKey()), entry.getValue()));
            }
        }
        return Vectors.sparse(size, arrayList);
    }

    public Vector transform(scala.collection.immutable.Map<String, Double> map) {
        return transform((Map<String, Double>) JavaConverters.mapAsJavaMapConverter(map).asJava());
    }

    public String getInputCol() {
        return this.inputCol;
    }

    public void setInputCol(String str) {
        this.inputCol = str;
    }

    @Override // cn.com.pconline.adclick.pipeline.RowPipelineStage
    public List<Set<String>> inputsForOutputs() {
        ArrayList arrayList = new ArrayList();
        for (String str : this.categoryMap.keySet()) {
            HashSet hashSet = new HashSet();
            hashSet.add(String.valueOf(this.inputCol) + "_" + str);
            arrayList.add(hashSet);
        }
        return arrayList;
    }
}
