import pandas as pd
'''
目的：获取电话邦特征，样本数据,数据源为风控分析库
'''

feature_file_name='features/dhb.csv'

def get_features_from_file():
    '''
    从feature 文件中读取feature
    :return: df,columns=['feature','version']
    '''
    df_feature=pd.read_csv(feature_file_name,sep='\t')
    return df_feature

def get_feature_by_version(version=None):
    '''
    根据feature 的版本号，获取该版本下的feature，如果不指定，则获取最新的版本号
    :param version:int 负数为不指定或者
    :return:list
    '''
    df_feature = get_features_from_file()
    if (version ==None) or (version<1):
        version=df_feature.version.max()
    return df_feature[df_feature.version == version].feature.tolist()
def save_features(features):
    '''
    针对新的feature，同维护的feature文档比较，如果同最新版的特征一样，那么无需保存，如果不一致，则作为新的一版特征进行保存
    :param features:list
    :return:
    '''
    f2=get_feature_by_version()
    if (set(f2) & set(features)) == (set(f2) | set(features)):
        print('features are already newest,not need to save')
        return True
    else:
        #== 更新特征
        tmp=pd.DataFrame(features,columns=['feature'])
        df_feature=get_features_from_file()
        version=df_feature.version.max()+1
        tmp['version']=version
        columns=['feature','version']
        df_feature=pd.concat([df_feature[columns],tmp[columns]])
        df_feature[columns].to_csv(feature_file_name,index=None,encoding='utf8')




if __name__ == '__main__':
    features=get_feature_by_version()
    # features=features[1:10]
    save_features(features)



