Commit 042a76e7 authored by linfang.wang's avatar linfang.wang

覆盖率

parent b8eae307
...@@ -147,7 +147,7 @@ def cal_miss(df,feature,classes=[]): ...@@ -147,7 +147,7 @@ def cal_miss(df,feature,classes=[]):
tmp.loc[tmp[feature] == 0, 'flag'] = '0值' tmp.loc[tmp[feature] == 0, 'flag'] = '0值'
tmp.loc[tmp[feature] > 0, 'flag'] = '非0值' tmp.loc[tmp[feature] > 0, 'flag'] = '非0值'
headers = classes+['flag', 'cnt', 'miss_rate'] headers = classes+['flag', 'cnt', 'match_rate']
if len(classes) > 0: if len(classes) > 0:
# == 分类型 # == 分类型
df_gp = pd.merge( df_gp = pd.merge(
...@@ -155,12 +155,12 @@ def cal_miss(df,feature,classes=[]): ...@@ -155,12 +155,12 @@ def cal_miss(df,feature,classes=[]):
tmp.groupby(classes+['flag'])[feature].count().reset_index().rename(columns={feature: "cnt1"}), tmp.groupby(classes+['flag'])[feature].count().reset_index().rename(columns={feature: "cnt1"}),
on=classes, how='left' on=classes, how='left'
) )
df_gp['miss_rate'] = np.round(1-df_gp.cnt1 / df_gp.cnt, 3) df_gp['match_rate'] = np.round(df_gp.cnt1 / df_gp.cnt, 3)
df_out = df_gp df_out = df_gp
else: else:
all = [[ '非0值', tmp.shape[0], round(1 - tmp[tmp[feature] > 0].shape[0] / tmp.shape[0], 3)], all = [[ '非0值', tmp.shape[0], round(tmp[tmp[feature] > 0].shape[0] / tmp.shape[0], 3)],
[ '0值', tmp.shape[0], round(1 - tmp[tmp[feature] == 0].shape[0] / tmp.shape[0], 3)], [ '0值', tmp.shape[0], round( tmp[tmp[feature] == 0].shape[0] / tmp.shape[0], 3)],
['缺失值', tmp.shape[0], round(1 - tmp[(tmp[feature] < 0)].shape[0] / tmp.shape[0], 3)]] ['缺失值', tmp.shape[0], round(tmp[(tmp[feature] < 0)].shape[0] / tmp.shape[0], 3)]]
df_all = pd.DataFrame(all, columns=headers) df_all = pd.DataFrame(all, columns=headers)
df_out=df_all df_out=df_all
return df_out[headers] return df_out[headers]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment