package cn.quantgroup.dbc.spark.june;

import cn.quantgroup.dbc.bean.ComserviceISpiderUserInfo;
import cn.quantgroup.dbc.bean.UserInfo;
import cn.quantgroup.dbc.utils.DateUtil;
import cn.quantgroup.dbc.utils.JdbcExecuters;
import cn.quantgroup.dbc.utils.NumberUtil;
import cn.quantgroup.dbc.utils.SqlUtil;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * Created by renfeng on 2019/4/19.
 */
public class SpiderUserItemApp {

    public static void main(String[] args) {

        System.out.println("==========任务开始执行=============");
        System.out.println("本次导入" +  args[0]);

        SparkSession ss = SparkSession.builder().appName("SpiderUserItemApp").getOrCreate();

        Dataset<UserInfo> userInfoDataset = ss.read().parquet("hdfs:///app/user/data/user_data/feng.ren/user_parquet").map(row -> {
            UserInfo userInfo = new UserInfo();
            userInfo.setPhone(row.getAs("phone"));
            userInfo.setUuid(row.getAs("uuid"));
            return userInfo;
        }, Encoders.bean(UserInfo.class));

        System.out.println("======用户加载完成======" + userInfoDataset.head().getPhone());
        System.out.println("======用户加载总数量======" + userInfoDataset.count());

        Dataset<String> dataset = ss.read().textFile("hdfs:///app/user/data/deeplearning/export_data/spider_user_item/" + args[0]);


        Dataset<ComserviceISpiderUserInfo> phoneBillInfoDataset = dataset.filter(row -> row.split("\t", -1).length > 13)
                .map(row -> {
                    String[] split = row.split("\t", -1);
                    ComserviceISpiderUserInfo spiderUserInfo = new ComserviceISpiderUserInfo();
                    spiderUserInfo.setRealName(StringUtils.isBlank(split[1]) ? null : split[1]);

                    String registerdate = registerDate2Date(split[2]);
                    spiderUserInfo.setRegisterDate(StringUtils.isBlank(registerdate) ? null : registerdate);

                    spiderUserInfo.setIdCard(StringUtils.isBlank(split[3]) ? null : split[3]);

                    String phoneremain = phoneRemainToFormat(split[4]);
                    spiderUserInfo.setPhoneRemain(StringUtils.isBlank(phoneremain) ? null : phoneremain);

                    spiderUserInfo.setPhone(StringUtils.isBlank(split[5]) ? null : split[5]);
                    spiderUserInfo.setAddr(StringUtils.isBlank(split[6]) ? null : split[6]);
                    spiderUserInfo.setMerry(StringUtils.isBlank(split[7]) ? null : split[7]);
                    spiderUserInfo.setCardType(StringUtils.isBlank(split[8]) ? null : split[8]);
                    spiderUserInfo.setCardNo(StringUtils.isBlank(split[9]) ? null : split[9]);
                    spiderUserInfo.setSex(StringUtils.isBlank(split[10]) ? null : split[10]);
                    spiderUserInfo.setLoginName(StringUtils.isBlank(split[11]) ? null : split[11]);
                    spiderUserInfo.setUserSource(StringUtils.isBlank(split[12]) ? null : split[12]);

                    spiderUserInfo.setTimestamp(StringUtils.isBlank(split[13]) ? null : timeStamp2Date(split[13]));
                    return spiderUserInfo;
                }, Encoders.bean(ComserviceISpiderUserInfo.class));


        Dataset<Row> finalData = phoneBillInfoDataset.join(userInfoDataset, "phone");

        System.out.println("======join总数量======" + finalData.count());

        String sql = "INSERT IGNORE INTO comservice_i_spider_user_info (uuid,realName,registerDate,idCard,phoneRemain,phone,addr,merry,cardType,cardNo,sex,loginName,userSource,timestamp) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?)";


        finalData.foreachPartition(line -> {
            List<List<String>> paramList = new ArrayList<>(500);

            line.forEachRemaining(o -> {
                List<String> list = new ArrayList<>();
                //取前4个元素(手机号/姓名/账单月/账单金额), 主要是用到的有 手机号/账单月/账单金额
                try {
                    String uuid = StringUtils.isBlank(o.getAs("uuid")) ? null : o.getAs("uuid");
                    list.add(uuid);

                    list.add(o.getAs("realName"));
                    list.add(o.getAs("registerDate"));
                    list.add(o.getAs("idCard"));
                    list.add(o.getAs("phoneRemain"));
                    list.add(o.getAs("phone"));
                    list.add(o.getAs("addr"));
                    list.add(o.getAs("merry"));
                    list.add(o.getAs("cardType"));
                    list.add(o.getAs("cardNo"));
                    list.add(o.getAs("sex"));
                    list.add(o.getAs("loginName"));
                    list.add(o.getAs("userSource"));
                    list.add(o.getAs("timestamp"));

                    paramList.add(list);

                    if (paramList.size() != 0 && paramList.size() % 500 == 0) {
                        JdbcExecuters.prepareBatchUpdateExecute(sql, paramList);
                        paramList.clear();
                    }

                } catch (ArrayIndexOutOfBoundsException a) {  //已知异常不管
                } catch (Exception e) {
                    System.out.println("======通话账单未知异常======");
                    e.printStackTrace();
                }
            });

            JdbcExecuters.prepareBatchUpdateExecute(sql, paramList);
        });


        System.out.println("=====================执行完毕");
        ss.close();

    }


    public static String getExecuteSql(List<String> list, String uuid) {
        String str = "INSERT IGNORE INTO comservice_i_spider_user_info (uuid,realName,registerDate,idCard,phoneRemain,phone,addr,merry,cardType,cardNo,sex,loginName,userSource,timestamp) values (" + (StringUtils.isBlank(uuid) ? (null + ",") : ("'" + uuid + "',")) + "%s)";
        String formatStr = "";
        for (int i = 0; i < list.size(); i++) {
            String string = list.get(i);
            if (string == null) {
                formatStr += (string + (i == list.size() - 1 ? "" : ","));
            } else {
                formatStr += ("'" + string.replaceAll("'", "") + "'" + (i == list.size() - 1 ? "" : ","));
            }
        }
        String executeSql = String.format(str, formatStr);
//        System.out.println("===========执行sql========="+executeSql);
        return executeSql;
    }

    public static String phoneRemainToFormat(String phoneRemain) {
        if (StringUtils.isBlank(phoneRemain)) {
            return null;
        }

        String phoneRemain1 = phoneRemain.replaceAll("\\.", "").replaceAll(" ", "");
        if (!StringUtils.isNumeric(phoneRemain1)) {
            return phoneRemain;
        }

        try {
            return new BigDecimal(phoneRemain).setScale(2, BigDecimal.ROUND_HALF_UP).toString();
        } catch (Exception e) {
            System.out.println("======用户信息余额异常======" + phoneRemain);
            e.printStackTrace();
        }
        return phoneRemain;
    }


    public static String registerDate2Date(String timeStamp) {
        if (StringUtils.isBlank(timeStamp)) {
            return null;
        }
        String errorTime = "1960-01-01 00:00:00";

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        try {
            if (!timeStamp.startsWith("1") && !timeStamp.startsWith("2")) {
                return null;
            }

            Date date = sdf.parse(timeStamp);
            Date errorDate = sdf.parse(errorTime);
            if (date.compareTo(errorDate) < 0) {
                return null;
            }

            if (new Date().compareTo(date) < 0) {
                return null;
            }

            return timeStamp;
        } catch (Exception e) {
            System.out.println("======注册时间异常======" + timeStamp);
            e.printStackTrace();
        }
        return timeStamp;
    }

    public static String timeStamp2Date(String timeStamp) {
        if (timeStamp.contains("\\N")) {
            return null;
        }
        try {
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            return sdf.format(new Date(Long.valueOf(timeStamp)));
        } catch (Exception e) {
            System.out.println("======timestamp======" + timeStamp);
            e.printStackTrace();
        }
        return null;
    }


}
