Commit 7d8f118e authored by data爬虫-冯 军凯's avatar data爬虫-冯 军凯

transactionLog 数据清洗 main类修改12

parent 903bc8db
...@@ -21,52 +21,42 @@ public class CleanningTransactionLogMain { ...@@ -21,52 +21,42 @@ public class CleanningTransactionLogMain {
public static void main(String[] args) { public static void main(String[] args) {
SparkSession ss = SparkSession.builder().appName("DBC").getOrCreate(); SparkSession ss = SparkSession.builder().appName("DBC").getOrCreate();
System.out.println("开始加载数据"); System.out.println("开始加载数据");
List<String> textFileList = new ArrayList<>(); String hdfsPath = "hdfs:///app/user/data/user_data/feng.ren/transactionLog20191226/id=";
String hdfsPath = "hdfs:///app/user/data/user_data/feng.ren/transactionLog20191226/id=%s/part-m-00000"; String[] hdfsArr = new String[args.length];
List<String> argsArray = Arrays.asList(args).subList(2, args.length); for (int i = 0; i < args.length; i++) {
hdfsArr[i] = hdfsPath + args[i];
for (int i = 0; i < argsArray.size(); i++) {
textFileList.add(String.format(hdfsPath, argsArray.get(i)));
} }
for(int i=0;i<textFileList.size();i++){ System.out.println("读取hdfsPath完毕: "+JSON.toJSONString(hdfsArr));
System.out.println("读取hdfs地址文件: "+ textFileList.get(i));
}
Map<String, String[]> textFileMap = PagingUtil.paging(textFileList, Integer.valueOf(args[1])); Dataset<String> dataset = ss.read().textFile(hdfsArr);
dataset.repartition(4).foreachPartition(func -> {
textFileMap.forEach((k, v) -> { ArrayList<TransactionLog> transactionLogs = new ArrayList<>();
System.out.println("======" + k + "读取的hdfs======" + JSON.toJSONString(v)); String sql = "INSERT INTO `call_record` (`request_url`, `transaction_id`, `uuid`, `url_type`, `code`, `created_at`, `updated_at`) VALUES (?,?,?,?,?,?,?)";
// Dataset<String> dataset = ss.read().textFile(v); func.forEachRemaining(item -> {
// try {
// List<TransactionLog> transactionLogs = new ArrayList<>(); String[] split = item.split("\t");
// String sql = "INSERT INTO `call_record` (`request_url`, `transaction_id`, `uuid`, `url_type`, `code`, `created_at`, `updated_at`) VALUES (?,?,?,?,?,?,?)"; TransactionLog transactionLog = new TransactionLog();
// dataset.foreach(o -> { transactionLog.setCode(split[3]);
// try { SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// String[] split = o.split("\t"); Timestamp timestamp = new Timestamp(simpleDateFormat.parse(split[4]).getTime());
// TransactionLog transactionLog = new TransactionLog(); transactionLog.setCreated_at(timestamp);
// transactionLog.setCode(split[3]); transactionLog.setTransaction_id(split[0]);
// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); transactionLog.setUuid(split[1]);
// Timestamp timestamp = new Timestamp(simpleDateFormat.parse(split[4]).getTime()); transactionLog.setUrl_type(split[2]);
// transactionLog.setCreated_at(timestamp); transactionLog.setUpdated_at(timestamp);
// transactionLog.setTransaction_id(split[0]);
// transactionLog.setUuid(split[1]); if (transactionLogs.size() != 0 && transactionLogs.size() % 200 == 0) {
// transactionLog.setUrl_type(split[2]); // JdbcExecuters.prepareBatchUpdateExecuteTransactionid(sql, transactionLogs);
// transactionLog.setUpdated_at(timestamp); System.out.println("执行sql集合: "+JSON.toJSONString(transactionLogs));
// transactionLogs.clear();
// }
// if (transactionLogs.size() != 0 && transactionLogs.size() % 200 == 0) { } catch (Exception e) {
//// JdbcExecuters.prepareBatchUpdateExecuteTransactionid(sql, transactionLogs); System.out.println("单个数据拼装异常: "+item);
// System.out.println("执行sql集合: "+JSON.toJSONString(transactionLogs)); e.printStackTrace();
// transactionLogs.clear(); }
// }else{ });
// transactionLogs.add(transactionLog);
// }
// } catch (Exception e) {
// System.out.println("单个数据拼装异常: "+o);
// e.printStackTrace();
// }
// });
// JdbcExecuters.prepareBatchUpdateExecuteTransactionid(sql, transactionLogs); // JdbcExecuters.prepareBatchUpdateExecuteTransactionid(sql, transactionLogs);
}); });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment