package cn.quantgroup.financial.service;

import cn.quantgroup.financial.model.HttpResult;
import cn.quantgroup.financial.util.NetUtil;
import org.junit.Test;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by WuKong on 2017/6/17.
 */
public class DangdangSortTest {

    class Book {
        private String name;
        private String CommentNum;
        private String author;
        private String price_r;
        private String price_z;
        private String company;
        private String date;

        public String getName() {
            return name==null?"":name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public String getCommentNum() {
            return CommentNum==null?"":CommentNum;
        }

        public void setCommentNum(String commentNum) {
            CommentNum = commentNum;
        }

        public String getAuthor() {
            return author==null?"":author;
        }

        public void setAuthor(String author) {
            this.author = author;
        }

        public String getPrice_r() {
            return price_r==null?"":price_r;
        }

        public void setPrice_r(String price_r) {
            this.price_r = price_r;
        }

        public String getPrice_z() {
            return price_z==null?"":price_z;
        }

        public void setPrice_z(String price_z) {
            this.price_z = price_z;
        }

        public String getCompany() {
            return company==null?"":company;
        }

        public void setCompany(String company) {
            this.company = company;
        }

        public String getDate() {
            return date==null?"":date;
        }

        public void setDate(String date) {
            this.date = date;
        }
    }
    @Test
    public void  testDang(){
        String url = "http://bang.dangdang.com/books/bestsellers/01.07.00.00.00.00-recent7-0-0-1-";
        List<Book> books = new ArrayList<>();
        for(int num=1;num<=50;num++){
            HttpResult httpResult = NetUtil.getRequestUrl(url+num,null,3);
            String html = httpResult.getResult();
            String regex = "<ul\\s+?class=\"bang_list clearfix bang_list_mode\">([\\s\\S]+?)<!--paginating-->";
            Matcher matcher = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(html);
            if(matcher.find()){
                String sortContent = matcher.group(1);
                regex = "<li>([\\s\\S]*?)</li>";
                Matcher matcherLi = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(sortContent);
                while(matcherLi.find()){

                    Book book  = new Book();
                    books.add(book);
                    String bookInfo = matcherLi.group(1);
                    regex="<img[^>]*?alt=\"([^\\\"]+?)\"";
                    Matcher matcherBook = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(bookInfo);
                    if(matcherBook.find()){
                        book.setName(matcherBook.group(1));
                    }
                    regex="(\\d+)条评论</a>";
                    matcherBook = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(bookInfo);
                    if(matcherBook.find()){
                        book.setCommentNum(matcherBook.group(1));
                    }

                    regex="<div\\s*class=\"publisher_info\">(.+?)</div>";
                    matcherBook = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(bookInfo);
                    if(matcherBook.find()){
                        String authorInfo = matcherBook.group(1);
                        regex="title=\"([^\"]+?)\"";
                        Matcher authorMatcher = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(authorInfo);
                        if(authorMatcher.find()){
                            book.setAuthor(authorMatcher.group(1));
                            System.out.println("author="+authorMatcher.group(1));
                        }

                    }
                    if(matcherBook.find()){
                        String companyInfo = matcherBook.group(1);
                        regex=">([^>]+?)</a>";
                        Matcher companyMatcher = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(companyInfo);
                        if(companyMatcher.find()){
                            book.setCompany(companyMatcher.group(1));
                            System.out.println("company="+companyMatcher.group(1));
                        }
                        regex="<span>(.+?)</span>";
                        Matcher dateMatcher = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(companyInfo);
                        if(dateMatcher.find()){
                            book.setDate(dateMatcher.group(1));
                            System.out.println("date="+dateMatcher.group(1));
                        }
                    }

                    regex="<span\\s*class=\"price_r\">&yen;([.\\d]+)</span>";
                    Matcher priceRMatcher = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(bookInfo);
                    if(priceRMatcher.find()){
                        book.setPrice_r(priceRMatcher.group(1));
                        System.out.println("priceR="+priceRMatcher.group(1));
                    }

                    regex="<span\\s*class=\"price_n\">&yen;([.\\d]+)</span>";
                    Matcher priceZMatcher = Pattern.compile(regex,Pattern.CASE_INSENSITIVE).matcher(bookInfo);
                    if(priceZMatcher.find()){
                        book.setPrice_z(priceZMatcher.group(1));
                        System.out.println("priceZ="+priceZMatcher.group(1));
                    }
                }
            }

        }
        generateFile(books);

    }
    public void generateFile(List<Book> books){
        File file = new File("艺术普及.txt");
        if(!file.exists()){
            try {
                file.createNewFile();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        try(OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(file))){
            books.forEach(book -> {
                String line = book.getName().replace("\t","")+"\t"+book.getAuthor().replace("\t","")+"\t"+book.getDate().replace("\t","")+"\t"+book.getCompany().replace("\t","")+"\t"+book.getCommentNum().replace("\t","")+"\t"+book.getPrice_r().replace("\t","")+"\t"+book.getPrice_z().replace("\t","");
                try {
                    outputStreamWriter.write(line+"\r");
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
}
