package cn.quantgroup.dbc.spark.transactionlog;

import cn.quantgroup.dbc.utils.JdbcExecuters;
import com.alibaba.fastjson.JSON;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;

import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;

/**
 * @Author fengjunkai
 */
public class CleanningTransactionLogMain {

    public static void main(String[] args) {
        System.out.println("接收到参数: "+JSON.toJSONString(args));
        SparkSession ss = SparkSession.builder().appName("DBC").getOrCreate();
        System.out.println("开始加载数据");
        String hdfsPath = "hdfs:///app/user/data/user_data/feng.ren/transactionLog20191226/id=";
        String[] hdfsArr = new String[args.length];
        for (int i = 0; i < args.length; i++) {
            hdfsArr[i] = hdfsPath + args[i];
        }

        System.out.println("读取hdfsPath完毕: "+JSON.toJSONString(hdfsArr));

        Dataset<String> dataset = ss.read().textFile(hdfsArr);
        dataset.repartition(4).foreachPartition(func -> {
            System.out.println("开始执行数据清洗");
            ArrayList<TransactionLog> transactionLogs = new ArrayList<>();
            String sql = "INSERT INTO `call_record` (`request_url`, `transaction_id`, `uuid`, `url_type`, `code`, `created_at`, `updated_at`) VALUES (?,?,?,?,?,?,?)";
            func.forEachRemaining(item -> {
//                System.out.println("单条数据清洗");
                try {
                    String[] split = item.split("\t");
                    TransactionLog transactionLog = new TransactionLog();
                    transactionLog.setCode(split[3]);
                    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                    Timestamp timestamp = new Timestamp(simpleDateFormat.parse(split[4]).getTime());
                    transactionLog.setCreated_at(timestamp);
                    transactionLog.setTransaction_id(split[0]);
                    transactionLog.setUuid(split[1]);
                    transactionLog.setUrl_type(split[2]);
                    transactionLog.setUpdated_at(timestamp);

                    if (transactionLogs.size() != 0 && transactionLogs.size() % 200 == 0) {
                        System.out.println("执行sql集合: "+transactionLogs.size());
                        JdbcExecuters.prepareBatchUpdateExecuteTransactionid(sql, transactionLogs);
                        transactionLogs.clear();
                    }
                } catch (Exception e) {
                    System.out.println("单个数据拼装异常: "+item);
                    e.printStackTrace();
                }
            });
            JdbcExecuters.prepareBatchUpdateExecuteTransactionid(sql, transactionLogs);
        });

        ss.stop();
        System.out.println("完事");

    }

   /* 84000000
            83000000
            82000000
            81000000
            80000000
            79000000
            78000000
            77000000
            76000000
            75000000
            74000000
            73000000
            72000000
            71000000
            70000000
            69000000
            68000000
            67000000
            66000000
            65000000
            64000000
            63000000
            62000000
            61000000
            60000000
            59000000
            58000000
            57000000
            56000000
            55000000
            54000000
            53000000
            52000000
            51000000
            50000000
            49000000
            48000000
            47000000
            46000000
            45000000
            44000000
            43000000
            42000000
            41000000
            40000000
            39000000
            38000000
            37000000
            36000000
            35000000
            34000000
            33000000
            32000000
            31000000
            30000000
            29000000
            28000000
            27000000
            26000000
            25000000
            24000000
            23000000
            22000000
            21000000
            20000000
            19000000
            18000000
            17000000
            16000000
            15000000
            14000000
            13000000
            12000000
            11000000
            10000000
            9000000
            8000000
            7000000
            6000000
            5000000
            4000000
            3000000
            2000000
            1000000
*/

}
