package cn.quantgroup.dbc.spark.fingerprint;

import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.*;

import java.util.*;

public class FingerPrintHdfs {

    public static void main(String[] args) {
        String[] headers = {"userid", "type", "createdate", "data", "memo", "params", "timestamp", "htime", "dt"};

        SparkSession sparkSession = SparkSession.builder()
                .appName("FingerPrintHdfs")

                .getOrCreate();
        SQLContext sqlContext = sparkSession.sqlContext();

        sqlContext.sql("use hbase_full");

        Dataset<Row> dataset = sqlContext.sql("select userid, type, createdate, data, memo, params, timestamp, htime,dt from finger_print_info");

        Dataset<String> rdd = dataset.map(row -> {
            String data = row.getAs("data");
            if (StringUtils.isBlank(data) || !(data = data.trim()).startsWith("{") || !data.endsWith("}")) {
                return null;
            }

            StringBuilder stringBuilder = new StringBuilder();
            for (String header : headers) {
                if (!"data".equals(header)) {
                    String value = row.getAs(header);
                    stringBuilder.append(value).append("\001");
                }
            }



            JSONObject jsonObject = JSONObject.parseObject(data);

            //把所有字段拆出来，到一级json
            jsonObject = EquipmentFingerprintApp.flatJson(jsonObject);

            //合并字段
            EquipmentFingerprintApp.combineColumn(jsonObject);

            //清洗逻辑
            EquipmentFingerprintApp.cleanColumn(jsonObject);


            int i = 0;
            int size = EquipmentFingerprintApp.dataColumns.size();
            for (String column : EquipmentFingerprintApp.dataColumns) {
                if (++i < size) {
                    try {
                        stringBuilder.append(jsonObject.getString(column) == null ? "" : jsonObject.getString(column)).append("\001");
                    } catch (Exception e) {
                        //如果上述jsonObject获取值出现问题，则使用null
                        stringBuilder.append("\001");
                    }
                } else {
                    stringBuilder.append(jsonObject.getString(column));
                }
            }

            return stringBuilder.toString();
        }, Encoders.STRING()).filter(Objects::nonNull);

        rdd.repartition(300).write().mode(SaveMode.Overwrite).text("hdfs:///app/user/data/user_data/feng.ren/finger_print_info");
    }


}
