import pandas as pd
from data.samples import sample
from data.datasource import dbquery
from data.datasource.mysqldb import engine_risk_analysis
from data.samples.yewudata import *
'''
目的：获取电话邦特征，样本数据,数据源为风控分析库
'''

feature_file_name='features/dhb.csv'

def get_feature():
    return sample.get_feature_by_version(feature_file_name)

def query_sample(start_date,end_date,is_loan=True):
    '''
    默认提取放款集
    :param start_date:
    :param end_date:
    :return:样本数据
    '''
    features=get_feature()
    if is_loan:
        sql='''
        select loan_id,%s
        from risk_analysis
        where dhb_flag =1 and transacted=1 and applied=1
        and applied_at >='%s' and applied_at<'%s'
        ''' % (','.join(features),start_date,end_date)
    else:
        sql='''
        select loan_id,%s
        from risk_analysis
        where dhb_flag =1 and applied=1
        and applied_at >='%s' and applied_at<'%s'
        ''' % (','.join(features),start_date,end_date)
    df=dbquery.mysql_query(sql,engine_risk_analysis)
    yewu=query_byloanid(df.loan_id.tolist())
    df=pd.merge(df,yewu,on='loan_id',how='inner')
    value_map = {
        "近3天": 1,
        "近4-5天": 2,
        "近6-7天": 3,
        "近8-15天": 4,
        "近16-30天": 5,
        "近31-60天": 6,
        "近61-90天": 7,
        "近91-120天": 8,
        "近121-150天": 9,
        "近151-180天": 10,
        "180天前": 11,
        "无": 0
    }
    cols = ["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time", "dhb_overview_ntdun_first_call_time",
            "dhb_overview_ntdun_last_call_time"]
    df[cols] = df[cols].applymap(lambda x: value_map[x])

    df.loc[
        df.dhb_last_60_and_90_days_ntdun_call_avg_duration >= 42, "dhb_last_60_and_90_days_ntdun_call_avg_duration"] = 42
    df.loc[df.dhb_overview_ntdun_call_duration_above60 >= 25, "dhb_overview_ntdun_call_duration_above60"] = 25
    df.loc[
        df.dhb_last_30_and_60_days_ntdun_call_total_duration >= 800, "dhb_last_30_and_60_days_ntdun_call_total_duration"] = 800
    df.loc[
        df.dhb_last_30_and_60_days_dun_call_in_duration >= 1600, "dhb_last_30_and_60_days_dun_call_in_duration"] = 1600
    df.loc[df.dhb_last_30_days_ntdun_call_total_duration >= 2500, "dhb_last_30_days_ntdun_call_total_duration"] = 2500
    df.loc[df.dhb_last_30_days_ntdun_call_tel_total_nums >= 25, "dhb_last_30_days_ntdun_call_tel_total_nums"] = 25
    df.loc[df.dhb_last_30_days_dun_call_in_duration >= 1000, "dhb_last_30_days_dun_call_in_duration"] = 1000
    df.loc[df.dhb_overview_ntdun_call_total_duration >= 3000, "dhb_overview_ntdun_call_total_duration"] = 3000
    df.loc[df.dhb_overview_ntdun_call_in_times >= 25, "dhb_overview_ntdun_call_in_times"] = 25
    df.loc[
        df.dhb_last_60_and_90_days_ntdun_call_in_duration >= 1000, "dhb_last_60_and_90_days_ntdun_call_in_duration"] = 1000
    df.loc[df.dhb_overview_dun_call_tel_total_nums >= 22, "dhb_overview_dun_call_tel_total_nums"] = 22
    df.loc[df.dhb_last_30_days_dun_call_total_duration >= 1100, "dhb_last_30_days_dun_call_total_duration"] = 1100
    df.loc[df.dhb_last_two_weeks_ntdun_call_in_duration >= 300, "dhb_last_two_weeks_ntdun_call_in_duration"] = 300
    return df

# if __name__ == '__main__':
#     features=sample.get_feature_by_version(feature_file_name)
#     features=features[1:10]
#     sample.save_features(features,feature_file_name)



