Commit ccad6faa authored by linfang.wang's avatar linfang.wang

加入业务数据

parent a33add70
import pandas as pd
def query(sql,engine_sql):
'''
查询大量数据
:param sql:
:param engine_sql:查询器
:return:dataframe
'''
res=[]
tmp=pd.read_sql(sql,engine_sql,chunksize=10000)
for tt in tmp:
res.append(tt)
return pd.concat(res)
\ No newline at end of file
import pandas as pd
from data.datasource.mysqldb import *
from data.samples import mysqlquery
'''
目的:提供业务数据,包括 order_no,loan_id,用户类型【直接使用risk_info】,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
'''
def query_user_loan_type(order_nos):
'''
:param order_nos:
:return:order_no,user_loan_type-- 策略使用的用户类型
'''
res = []
sql = '''
select biz_no as order_no,
ifnull(JSON_EXTRACT(audit_context_data, '$.user_loan_type_v3'),JSON_EXTRACT(audit_context_data, '$.user_loan_type_v4')) as user_loan_type
from biz_audit_log_data
where biz_no in %s
'''
for i in range(0, len(order_nos), 1000):
print('----exe sql %d---- ' % i)
res.append(pd.read_sql(sql % str(tuple(order_nos[i:i + 1000])), engine_risk))
df= pd.concat(res)
df.drop_duplicates(['order_no'],inplace=True)
return df
def query_byloanid(loan_ids):
'''
数据源为分析库
:param loan_ids:list 放款集
:return:order_no,user_id,loan_id,用户类型,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
'''
sql='''
select t1.loan_id,t1.user_id,t2.order_no,t2.contract_term,if(t2.term_no==1,t2.passdue_day,null) as passdue_day,
t1.applied_at,t1.applied_type,t1.applied_channel,if(t2.loan_id is not null,'已放款',if(t1.approval==1,'审核通过','审核未通过')) as reason,
max(t2.passdue_day) as max_passdue_day
from loan_application t1
left join loan_repay t2 on t1.loan_id=t2.loan_id and t2.repayment_status!=4
where t1.loan_id in %s
group by 1,2,3,4,5,6,7,8
'''
res=[]
for i in range(0, len(loan_ids), 1000):
print('----exe sql %d---- ' % i)
res.append(pd.read_sql(sql % str(tuple(loan_ids[i:i + 1000])), engine_analysis_new))
df = pd.concat(res)
df.order_no = df.order_no.apply(lambda x:x.decode('utf8'))
#== 剔除重复数据
df.sort_values(['loan_id'],ascending=True,inplace=True)
df.drop_duplicates(['loan_id'],keep='last',inplace=True)
return df
def query_by_orderno(order_nos):
'''
数据源为分析库
:param loan_ids:list 放款集
:return:order_no,user_id,loan_id,用户类型,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
'''
sql = '''
select t1.loan_id,t1.user_id,t2.order_no,t2.contract_term,if(t2.term_no==1,t2.passdue_day,null) as passdue_day,
t1.applied_at,t1.applied_type,t1.applied_channel,if(t2.loan_id is not null,'已放款',if(t1.approval==1,'审核通过','审核未通过')) as reason,
max(t2.passdue_day) as max_passdue_day
from loan_application t1
left join loan_repay t2 on t1.loan_id=t2.loan_id and t2.repayment_status!=4
where t1.order_no in %s
group by 1,2,3,4,5,6,7,8
'''
res = []
for i in range(0, len(order_nos), 1000):
print('----exe sql %d---- ' % i)
res.append(pd.read_sql(sql % str(tuple(order_nos[i:i + 1000])), engine_analysis_new))
df = pd.concat(res)
df.order_no = df.order_no.apply(lambda x: x.decode('utf8'))
# == 剔除重复数据
df.sort_values(['loan_id'], ascending=True, inplace=True)
df.drop_duplicates(['loan_id'], keep='last', inplace=True)
return df
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment