Commit 042a76e7 authored by linfang.wang's avatar linfang.wang

覆盖率

parent b8eae307
......@@ -147,7 +147,7 @@ def cal_miss(df,feature,classes=[]):
tmp.loc[tmp[feature] == 0, 'flag'] = '0值'
tmp.loc[tmp[feature] > 0, 'flag'] = '非0值'
headers = classes+['flag', 'cnt', 'miss_rate']
headers = classes+['flag', 'cnt', 'match_rate']
if len(classes) > 0:
# == 分类型
df_gp = pd.merge(
......@@ -155,12 +155,12 @@ def cal_miss(df,feature,classes=[]):
tmp.groupby(classes+['flag'])[feature].count().reset_index().rename(columns={feature: "cnt1"}),
on=classes, how='left'
)
df_gp['miss_rate'] = np.round(1-df_gp.cnt1 / df_gp.cnt, 3)
df_gp['match_rate'] = np.round(df_gp.cnt1 / df_gp.cnt, 3)
df_out = df_gp
else:
all = [[ '非0值', tmp.shape[0], round(1 - tmp[tmp[feature] > 0].shape[0] / tmp.shape[0], 3)],
[ '0值', tmp.shape[0], round(1 - tmp[tmp[feature] == 0].shape[0] / tmp.shape[0], 3)],
['缺失值', tmp.shape[0], round(1 - tmp[(tmp[feature] < 0)].shape[0] / tmp.shape[0], 3)]]
all = [[ '非0值', tmp.shape[0], round(tmp[tmp[feature] > 0].shape[0] / tmp.shape[0], 3)],
[ '0值', tmp.shape[0], round( tmp[tmp[feature] == 0].shape[0] / tmp.shape[0], 3)],
['缺失值', tmp.shape[0], round(tmp[(tmp[feature] < 0)].shape[0] / tmp.shape[0], 3)]]
df_all = pd.DataFrame(all, columns=headers)
df_out=df_all
return df_out[headers]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment