" r = re.findall(\"(\\d{4}年\\d{1,2}月\\d{1,2}日)(.*?)发放的(.*?),业务号(.*?),授信额度[折合人民币]{0,5}(.*?)元,共享授信额度[折合人民币]{0,5}(.*?)元,(.*?)[,|。]{1}截至(\\d{4}年\\d{1,2}月\\d{1,2}日),(账户状态为“[\\u4E00-\\u9FA5]{1,2}”)*\",v[1])\n",
" if r :\n",
" r = r[0]\n",
" _json['银行名称'] = r[1]\n",
" _json['授信金额'] = r[4]\n",
" _json['共享授信金额'] = r[5]\n",
" _json['授信时间'] = r[0]\n",
" _json['截止时间'] = r[7]\n",
" if r[8]:\n",
" r_temp = re.findall('账户状态为“(.*?)”',r[8])[0]\n",
" _json['账户状态 '] = r_temp\n",
"\n",
" if v[2] != None:\n",
" table_tag = get_table(v[2])\n",
" \n",
" table_json = get_json_DebitCard(table_tag)\n",
" key_name = table_json.keys()\n",
" plan = ''\n",
" overdue = ''\n",
" temp_json = {}\n",
" for name in key_name:\n",
" r = re.findall(\"(\\d{4}年\\d{1,2}月)-(\\d{4}年\\d{1,2}月)的([\\S+]*)\",name)\n",
"<ipython-input-310-6f3d66682be4> in get_json_DebitCard(tag)\n 35 if r:\n 36 r = r[0]\n---> 37 if result[temp_key][ii] not in result[r].keys():\n 38 result[r][result[temp_key][ii]] = []\n 39 result[r][result[temp_key][ii]].append(value_list[ii])\n",
"IndexError: list index out of range"
]
}
],
"source": [
"def get_json_DebitCard(tag):\n",
" result = defaultdict(dict)\n",
" key = []\n",
" value = []\n",
" value_list = []\n",
" _k = []\n",
" _v = []\n",
"\n",
" temp_key = None\n",
" for i,val in enumerate(tag):\n",
" tds = val.find_all('td')\n",
" # tds_len = len(tds)\n",
"# next_sibling = val.next_sibling\n",
"# if (next_sibling == ' ' or next_sibling =='\\n' ) and next_sibling!=None:\n",