package com.js.sync.service.impl;

import cn.hutool.core.util.ReUtil;
import cn.hutool.http.HttpException;
import cn.hutool.http.HttpRequest;
import com.js.api.sync.service.CrawlerService;
import com.js.common.constant.Constant;
import com.js.common.enums.SiteEnum;
import lombok.extern.slf4j.Slf4j;
import org.apache.dubbo.config.annotation.Service;

import java.util.Collections;
import java.util.List;

@Slf4j
@Service(
        protocol = {"rest", "dubbo"},
        version = Constant.DUBBO_VERSION,
        application = "${dubbo.application.id}",
        registry = "${dubbo.registry.id}"
)
public class CrawlerServiceImpl implements CrawlerService {

    @Override
    public String getStoreName(String sellerId, String siteEnumName) {
        SiteEnum siteEnum = SiteEnum.valueOf(siteEnumName);
        String url = "www." + siteEnum.getAmazonEndpointEnum().getMarketplaceName().toLowerCase() + "/sp?seller=" + sellerId;
        List<String> all = null;
        // 如果出现失败的情况，则再反复调2次
        for (int i = 0; i < 3; i++) {
            all = getName(url);
            if (all.size() != 0) {
                break;
            }
        }

        if (all.size() == 0) {
            return null;
        } else {
            return all.get(0);
        }
    }

    private List<String> getName(String url) {
        try {
            HttpRequest request = HttpRequest.get(url)
                    .setMaxRedirectCount(2)
                    .enableDefaultCookie();
            String html = request.execute().body();
            return ReUtil.findAll("(?<=<h1 id=\"sellerName\">).*(?=</h1>)", html, 0);
        } catch (HttpException e) {
            log.error("抓取店铺名称失败", e);
            return Collections.emptyList();
        }
    }
}
