import pandas as pd
import datetime
from dateutil.relativedelta import relativedelta
import os
'''
读取特征文件
'''

def get_features_from_file(feature_file_name):
    '''
    从feature 文件中读取feature
    :return: df,columns=['feature','version']
    '''
    print('当前目录：',os.path.abspath('.'))
    df_feature=pd.read_csv(feature_file_name)
    return df_feature

def get_feature_by_version(feature_file_name,version=None):
    '''
    根据feature 的版本号，获取该版本下的feature，如果不指定，则获取最新的版本号
    :param version:int 负数为不指定或者
    :return:list
    '''
    df_feature = get_features_from_file(feature_file_name)
    if (version ==None) or (version<1):
        version=df_feature.version.max()
    return df_feature[df_feature.version == version].feature.tolist()
def save_features(features,feature_file_name):
    '''
    针对新的feature，同维护的feature文档比较，如果同最新版的特征一样，那么无需保存，如果不一致，则作为新的一版特征进行保存
    :param features:list
    :return:
    '''
    f2=get_feature_by_version(feature_file_name)
    if (set(f2) & set(features)) == (set(f2) | set(features)):
        print('features are already newest,not need to save')
        return True
    else:
        #== 更新特征
        tmp=pd.DataFrame(features,columns=['feature'])
        df_feature=get_features_from_file(feature_file_name)
        version=df_feature.version.max()+1
        tmp['version']=version
        columns=['feature','version']
        df_feature=pd.concat([df_feature[columns],tmp[columns]])
        df_feature[columns].to_csv(feature_file_name,index=None,encoding='utf8')

def cal_sample_date(last_sample_max_date=None,passdue_day=15):
    '''
    提取样本数据，基于上次样本的last_sample_max_date,和 passdue_day 提取有表现的到目前的数据
    如果不指定 last_sample_max_date ，则以当前时间为基准，提取有passdue_day表现的近3个月的样本数据
    :param last_sample_min_date:上一次样本最早时间 精确到天，格式为 '%Y-%m-%d'
    :param last_sample_max_date:上一次样本最新时间 精确到天，格式为 '%Y-%m-%d'
    :param passdue_day:查看表现的，比如逾期15天表现的样本
    :return:start_date,end_date，可提取样本的最早时间，最晚时间
    '''
    base_date=datetime.datetime.now().date()
    #== +5 是因为不是每个用户的放款都是30天周期，有的可能是31天等
    #== 提取的样本数据不得超过base_date
    base_date=base_date+relativedelta(days=-(passdue_day+5),months=-1)
    if last_sample_max_date is None:
        start_date=base_date+relativedelta(months=-3)
        end_date=base_date
    else:
        #last_sample_max_date 为基准，计算
        if type(last_sample_max_date)==str:
            last_sample_max_date = datetime.strptime(last_sample_max_date,'%Y-%m-%d %H:%M:%S').date()
        if last_sample_max_date >=base_date:
            last_sample_max_date=base_date
        start_date=last_sample_max_date
        end_date=base_date
    return start_date,end_date

def read_record():
    file_name = 'record.txt'
    cols = ['model_name', 'min_date', 'max_date', 'sample_cnt',
            'train_min_date', 'train_max_date', 'train_cnt','train_auc',
            'test_min_date', 'test_max_date', 'test_cnt', 'test_auc', 'update_date']
    if os.path.exists(file_name):
        df = pd.read_csv('record.txt')
    else:
        df = pd.DataFrame(columns=cols)
    return df

def get_records(model_name):
    '''
    获取某一个模型下的所有的迭代的记录
    :param model_name:
    :return:
    '''
    df=read_record()
    df_select = df[df.model_name == model_name]
    df_select.sort_values(['update_date'], ascending=False, inplace=True)
    return df_select

def get_last_record(model_name):
    '''
        获取指定模型的上一次迭代模型的样本信息
        :param model_name:
        :return:
        '''
    df_select=get_records(model_name)
    if df_select.shape == 0:
        return df_select
    return df_select.head(1)

def save_model_record(model_name,min_date=None,max_date=None,sample_cnt=None,
                      train_min_date=None,train_max_date=None,train_cnt=None,train_auc=None,
                      test_min_date=None,test_max_date=None,test_cnt=None,test_auc=None):
    '''
    model_name,update_date 组成唯一健；如果有值，则更新，否则不进行更新
    :param model_name:
    :param min_date:
    :param max_date:
    :param sample_cnt:
    :param train_min_date:
    :param train_max_date:
    :param train_cnt:
    :param train_auc:
    :param test_min_date:
    :param test_max_date:
    :param test_cnt:
    :param test_auc:
    :return:
    '''
    df_all=read_record()
    df_all.reset_index(inplace=True)
    #== 获取当下的记录
    df_record=get_records(model_name)
    df_record=df_record[df_record.update_date==datetime.date()]
    cols = ['model_name', 'min_date', 'max_date', 'sample_cnt',
            'train_min_date', 'train_max_date', 'train_cnt', 'train_auc',
            'test_min_date', 'test_max_date', 'test_cnt', 'test_auc', 'update_date']
    if df_record.shape[0]==0:
        df_record=pd.DataFrame(columns=cols)
        df_record['model_name']=model_name
        df_record['update_date']=datetime.date()
    else:
        df_all = df_all[~df_all.index.isin(df_record.index)]
    df_record=__update__(df_record,'min_date',min_date)
    df_record = __update__(df_record, 'max_date', max_date)
    df_record = __update__(df_record, 'sample_cnt', sample_cnt)
    df_record = __update__(df_record, 'train_min_date', train_min_date)
    df_record = __update__(df_record, 'train_max_date', train_max_date)
    df_record = __update__(df_record, 'train_cnt', train_cnt)
    df_record = __update__(df_record, 'train_auc', train_auc)
    df_record = __update__(df_record, 'test_min_date', test_min_date)
    df_record = __update__(df_record, 'test_max_date', test_max_date)
    df_record = __update__(df_record, 'test_cnt', test_cnt)
    df_record = __update__(df_record, 'test_auc', test_auc)
    pd.concat([df_all[cols],df_record[cols]]).to_csv('record.txt',index=None,encoding='utf8')


def __update__(df,name,value):
    if value is not None:
        df[name]=value
    return df





