package cn.quantgroup.dbc.spark.mobile;

import cn.quantgroup.dbc.bean.MobileCallDetailInfo;
import cn.quantgroup.dbc.bean.MobileFlowInfo;
import cn.quantgroup.dbc.bean.UserInfo;
import cn.quantgroup.dbc.utils.*;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

/**
 * 移动流量详单清洗
 * /app/user/data/deeplearning/export_data/mobile_flow_info/mt=1556121600000
 * <p>
 * 手机号/开始时间/使用流量/上网时间
 * ["065b4129937773120180331144314","2018-03-31 14:43:14","","0","0","4g","13777399214","0.0","江西省","1525677748420"]
 * 开始时间/上网使用套餐/上网费用/上网时间/网络类型/上网手机号/总流量/上网地点/抓取时间(入库时间)
 *
 * @Author fengjunkai
 * @Date 2019-05-09 10:53
 */
public class MobileFlowDetailMain {

    public static void main(String[] args) {

        System.out.println("==========移动流量详单任务开始执行============");

        SparkSession ss = SparkSession.builder().appName("DBC").getOrCreate();


        //用户信息加载
        Dataset<String> user = ss.read().textFile("hdfs:///app/user/data/user_data/feng.ren/user_info/part-00000");
        Dataset<UserInfo> userInfoDataset = user.map((MapFunction<String, UserInfo>) line -> {
            UserInfo person = new UserInfo();
            String[] split = line.split("\t");
            person.setPhone(split[0]);
            person.setUuid(split[1]);
            return person;
        }, Encoders.bean(UserInfo.class));

        System.out.println("======用户加载完成======"+userInfoDataset.head().getPhone());
        System.out.println("======================="+userInfoDataset.where("phone='18500192679'").head().getUuid());

        //移动通话详单加载
        Dataset<String> dataset = ss.read().textFile("hdfs:///app/user/data/deeplearning/export_data/mobile_flow_info/"+args[0]+"/*");

        Dataset<MobileFlowInfo> mobileFlowMap = dataset.map((MapFunction<String, MobileFlowInfo>) line -> {
            String[] split = line.split("\t");
            MobileFlowInfo mobileFlowInfo = new MobileFlowInfo();

            String phone = split[6];
            String cTime = split[1];
            String totalFlow = split[7];
            String onlineTime = split[4];
            String onlineType = split[5];
            String communicationFees = split[3]; //上网费用
            String cheapService = split[2];  //使用套餐
            String tradeAddr = split[8];
            String time = split[9];

            try {
                mobileFlowInfo.setcTime(DateUtil.getCTimeFormat(cTime, "移动流量详单"));
                mobileFlowInfo.setCheapService(StringUtils.isBlank(cheapService) ? null : cheapService);
                mobileFlowInfo.setCommunicationFees(StringUtils.isBlank(communicationFees) ? null : communicationFees);
                mobileFlowInfo.setOnlineTime(StringUtils.isBlank(onlineTime) ? null : NumberUtil.getUseTimeFormat(onlineTime, "移动流量详单上网时间"));
                mobileFlowInfo.setOnlineType(StringUtils.isBlank(onlineType) ? null : onlineType);
                mobileFlowInfo.setPhone(StringUtils.isBlank(phone) ? null : PhoneNoUtils.getPhoneNoByRegx(phone, "移动流量详单号码"));
                mobileFlowInfo.setTotalFlow(StringUtils.isBlank(totalFlow) ? null : NumberUtil.flowFormat(totalFlow, "移动流量详单总流量"));
                mobileFlowInfo.setTradeAddr(StringUtils.isBlank(tradeAddr) ? null : tradeAddr);
                mobileFlowInfo.setTimestamp(StringUtils.isBlank(time) ? null : DateUtil.timeStamp2Date(time, "移动流量详单"));
            }catch (Exception e){
                e.printStackTrace();
                System.out.println("=============未知异常============");
            }
            return mobileFlowInfo;
        }, Encoders.bean(MobileFlowInfo.class));

        System.out.println("==========移动流量清洗完成=============");

        Dataset<Row> finalRow = mobileFlowMap.join(userInfoDataset, "phone");
        System.out.println("==============join完成================");
//
//        Dataset<Row> limit = finalRow.limit(10);
//        System.out.println("=================字段======"+limit.columns().toString());
//        limit.show();
//        List<Row> rows = limit.collectAsList();
//        for (Row row:rows){
//            System.out.println("================"+row.toString());
//        }

        ArrayList<String> sqls = new ArrayList<>(300);
        finalRow.foreach(row->{
            try {
                List<String> list = new ArrayList<>();
                try {
                    list.add(row.getString(1));  //cTime
                    list.add(row.getString(2)); //cheapService
                    list.add(row.getString(3)); //communicationFees
                    list.add(row.getString(4)); //onlineTime
                    list.add(row.getString(5)); //onlineType
                    list.add(row.getString(0)); //phone
                    list.add(row.getString(7)); //totalFlow
                    list.add(row.getString(8)); //tradeAddr
                    list.add(row.getString(6)); //time
                    String uuid =row.getString(9);

                    String sql = "INSERT IGNORE INTO comservice_i_mobile_flow_info (uuid,cTime,cheapService,communicationFees,onlineTime,onlineType,phone,totalFlow,tradeAddr,timestamp) values (" + (StringUtils.isBlank(uuid) ? (null + ",") : ("'" + uuid + "',")) + "%s)";
                    if(sqls.size()!=0&&sqls.size()%200==0){
                        JdbcExecuters.batchUpdateExecute(sqls);
                        sqls.clear();
                    }else{
                        sqls.add(SqlUtil.getExecuteSql(list, sql));
                    }
                } catch (Exception e) {
                }
            } catch (ArrayIndexOutOfBoundsException a) {
            } catch (Exception e) {
                System.out.println("======移动流量未知异常======" + row.toString());
            }
        });

        JdbcExecuters.batchUpdateExecute(sqls);









/**

        List<String> list1 = Arrays.asList(args).subList(2, args.length);
        System.out.println("======入参======" + JSON.toJSONString(args));
        System.out.println("======入参数据块======" + JSON.toJSONString(list1));

        List<String> textFileList = new ArrayList<>();
        String hdfs1 = "hdfs:///app/user/data/deeplearning/export_data/mobile_flow_info/" + args[0] + "/%s";
        for (int i = 0; i < list1.size(); i++) {
            textFileList.add(String.format(hdfs1, list1.get(i)));
        }

        Map<String, String[]> textFileMap = PagingUtil.paging(textFileList, Integer.valueOf(args[1]));

        ArrayList<String> sqls = new ArrayList<>(300);

        textFileMap.forEach((k, v) -> {
            System.out.println("======" + k + "读取的hdfs======" + JSON.toJSONString(v));
            Dataset<String> dataset = ss.read().textFile(v);
//            System.out.println("======dataset======"+dataset.count());
            dataset.foreach(o -> {
                try {


                    String[] split = o.split("\t");
                    List<String> list = new ArrayList<>();

                    String phone = split[6];
                    String cTime = split[1];
                    String totalFlow = split[7];
                    String onlineTime = split[4];


                    try {
                        list.add(DateUtil.getCTimeFormat(cTime, "移动流量详单"));

                        String cheapService = split[2];  //使用套餐
                        list.add(StringUtils.isBlank(cheapService) ? null : cheapService);

                        String communicationFees = split[3]; //上网费用
                        list.add(StringUtils.isBlank(communicationFees) ? null : communicationFees);

                        list.add(StringUtils.isBlank(onlineTime) ? null : NumberUtil.getUseTimeFormat(onlineTime, "移动流量详单上网时间"));

                        String onlineType = split[5];
                        list.add(StringUtils.isBlank(onlineType) ? null : onlineType);

                        list.add(StringUtils.isBlank(phone) ? null : PhoneNoUtils.getPhoneNoByRegx(phone, "移动流量详单号码"));

                        list.add(StringUtils.isBlank(totalFlow) ? null : NumberUtil.flowFormat(totalFlow, "移动流量详单总流量"));

                        String tradeAddr = split[8];
                        list.add(StringUtils.isBlank(tradeAddr) ? null : tradeAddr);

                        String time = split[9];
                        list.add(StringUtils.isBlank(time) ? null : DateUtil.timeStamp2Date(time, "移动流量详单"));


                    } catch (Exception e) {
                    }


                    String uuid = JedisUtil.jedisClient.getString(phone);
//                    if (StringUtils.isEmpty(uuid)) {
//                        uuid = JdbcExecuters.queryExecute("select uuid from xyqb_user_i_user where phone_no = '" + phone + "'");
//                        System.out.println("===========uuid未命中==============");
//                    }

                    String sql = "INSERT IGNORE INTO comservice_i_mobile_flow_info (uuid,cTime,cheapService,communicationFees,onlineTime,onlineType,phone,totalFlow,tradeAddr,timestamp) values (" + (StringUtils.isBlank(uuid) ? (null + ",") : ("'" + uuid + "',")) + "%s)";

                    if (sqls.size() != 0 && sqls.size() % 200 == 0) {
                        JdbcExecuters.batchUpdateExecute(sqls);
                        sqls.clear();
                    } else {
                        sqls.add(SqlUtil.getExecuteSql(list, sql));
                    }

                } catch (ArrayIndexOutOfBoundsException a) {
                    System.out.println("======移动流量详单出现关键字段数组越界======" + o);
                } catch (Exception e) {
                    System.out.println("======移动流量详单出现未知异常======" + o);
                    e.printStackTrace();
                }

            });

        });
 **/

        System.out.println("=====================移动流量详单执行完毕");
        ss.close();

    }


}
