Commit 7f7db97f authored by 桂秋月's avatar 桂秋月

大数据脚本整理,包括搜索,推荐

parents
Pipeline #1610 failed with stages
File added
# Default ignored files
/shelf/
/workspace.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/bigdata.iml" filepath="$PROJECT_DIR$/bigdata.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RunConfigurationProducerService">
<option name="ignoredProducers">
<set>
<option value="com.android.tools.idea.compose.preview.runconfiguration.ComposePreviewRunConfigurationProducer" />
</set>
</option>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
import jsonpath,requests,os,re,json,sys,traceback,datetime,random,time,itertools
from collections import defaultdict
import uuid as codeuuid
cur_dir=os.path.dirname(os.path.abspath(__file__))
project_dir=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
data_file_path=os.path.join(project_dir,'dataFile')
timenow = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
#sys.path.append(os.path.abspath(__file__))
channel = [214, 217, 1]
mysql_info={
"119":{
#'root', 'quantGroup#com', '172.24.17.119', 15307
"name":"root",
"pwd":"quantGroup#com",
"host":"172.24.17.119",
"port":"15307"
},
"220":{
"name":"recommender_user_rw",
"pwd":"rkwnPEG4",
"host":"172.30.220.11",
"port":"15307"
},
"saas5":{
"name":"root",
"pwd":"123456",
"host":"172.29.2.5",
"port":"3306"
},
"saas16":{
"name":"root",
"pwd":"root",
"host":"172.29.5.16",
"port":"3306"
},
"27":{
"name":"real_data_test",
"pwd":"qpRBSarm$Tv*YO!n",
"host":"172.30.5.27",
"port":"8066"
}
}
import pandas as pd
from sqlalchemy import create_engine
from databaseConn import *
#conn_db_48=create_engine('mysql://qa:{}@172.17.5.48:31393'.format('qatest'))
#conn_db_119 = create_engine('mysql://{}:{}@{}:{}'.format('root', 'quantGroup#com', '172.24.17.119', 15307))
#conn_db_220 = create_engine('mysql://recommender_user_rw:rkwnPEG4@172.30.220.11:15307')
#conn_db_27 = create_engine('mysql://real_data_test:{}@172.30.5.27:8066'.format('qpRBSarm$Tv*YO!n'))
# 172.29.2.5:3306 用户名:root 密码:123456 saas一期
#conn_db_2_5 = create_engine('mysql://root:123456@172.29.2.5:3306')
# conn_db_5_16 = create_engine('mysql://root:root@172.29.5.16:3306')
def mysqlInfo(namespace):
"""
:param namespace:
:return: 获取namespace对应的mysql信息,对大数据的mysql不适用,此时需要自定义mysql_info
"""
url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}&serviceType=base&serviceName=mysql".format(namespace)
result=mysql_info.get(str(namespace)) or {}
if result:
return result
try:
resp=requests.get(url)
result['host']=jsonpath.jsonpath(resp.json(),'$..lanIp')[0]
result['port']=jsonpath.jsonpath(resp.json(),'$..nodePort')[0]
result['pwd']='qatest'
result['name']='qa'
return result
except:
raise Exception("该namespace[{}]找不到对应的mysql信息".format(namespace))
def mysql_universal(namespace):
mysql_universal="mysql://{name}:{pwd}@{host}:{port}"
mysql_info=mysqlInfo(namespace)
#print(mysql_info)
return create_engine(mysql_universal.format(**mysql_info))
def execmysl(namespace,sql):
if 'select' not in sql or 'from' not in sql:
raise Exception('在mysql看来不是有效的sql',sql)
try:
df=pd.read_sql(sql,con=mysql_universal(namespace))
return df
except Exception as e:
print('mysql执行报错:')
traceback.print_exc(limit=2)
#print('mysql执行报错:',traceback.print_stack())
def concatSql(sql,**kwargs):
'''
:param sql:
:param kwargs:
:return: 只针对select的sql拼接,拼接后where是and的关系
'''
if not kwargs:
return sql
temp=' '
for k,v in kwargs.items():
if isinstance(v,(list,tuple)):
if len(v)>1:
concatinfo=k+" in "+ '{}' +' and '
temp+=concatinfo.format(tuple(v))
else:
temp+=k+'='+"'"+str(v[0])+"'" +' and '
else:
temp+=k+'='+"'"+str(v)+"'" +' and '
return sql+' where '+temp[:-4]
if __name__ == '__main__':
print(execmysl(119,'update 1'))
\ No newline at end of file
import pandas as pd
from sqlalchemy import create_engine
from collections import defaultdict
from urllib import parse
REDIS_CONFIG = {
'REDIS_NODES': [
{'host': '172.24.17.119', 'port': 6371},
{'host': '172.24.17.119', 'port': 6372},
{'host': '172.24.17.119', 'port': 6373},
{'host': '172.24.17.119', 'port': 6374},
{'host': '172.24.17.119', 'port': 6375},
{'host': '172.24.17.119', 'port': 6376}
],
'REDIS_EXPIRE_TIME': 26 * 3600,
'REDIS_MAX_CONNECTIONS': 50,
'REDIS_PASSWD': 'redis',
}
dapan_redis_config={
'REDIS_NODES': [
{'host': '172.29.2.5', 'port': 6371},
{'host': '172.29.2.5', 'port': 6372},
{'host': '172.29.2.5', 'port': 6373}
],
'REDIS_EXPIRE_TIME': 26 * 3600,
'REDIS_MAX_CONNECTIONS': 50,
'REDIS_PASSWD': '1234',
}
from rediscluster import RedisCluster
redis_db_conn_119 = RedisCluster(startup_nodes=REDIS_CONFIG.get('REDIS_NODES'),
max_connections=REDIS_CONFIG.get('REDIS_MAX_CONNECTIONS'),
password=REDIS_CONFIG.get('REDIS_PASSWD'), decode_responses=True)
# dapan_redis_db_conn = RedisCluster(startup_nodes=dapan_redis_config.get('REDIS_NODES'),
# max_connections=dapan_redis_config.get('REDIS_MAX_CONNECTIONS'),
# password=dapan_redis_config.get('REDIS_PASSWD'),
# decode_responses=True,
# skip_full_coverage_check=True)
def getRedisValue(key,conntype=None,oper='select'):
'''
:param key:
:param conntype: 区分不同的redis连接
:param oper:如果是delete,则做删除操作。目前只支持delete,select
:return:select->获取所有匹配到key的值
'''
if 'select' in key or 'from' in key:
raise Exception('在redis看来不是有效的key',key)
redis_value = defaultdict()
if conntype == 'dapan':
redis_db_conn=dapan_redis_db_conn
else:
redis_db_conn=redis_db_conn_119
uuid_redis_result=redis_db_conn.keys(key)
print('模糊匹配到的所有key:',uuid_redis_result)
if uuid_redis_result and oper=='delete':
for i in uuid_redis_result:
redis_db_conn.delete(i)
return 'delete succ'
#print(uuid_redis_result)
if not uuid_redis_result:
print("[{key}]模糊没有匹配到数据,返回0".format(key=key))
return 0
#print(uuid_redis_result,key,redis_key_type)
for i in uuid_redis_result:
redis_key_type=redis_db_conn.type(i)
if redis_key_type == 'list':
redis_value[i]=redis_db_conn.lrange(i,0,100)
# uuid_redis_result = redis_db_conn.lrange(i)
elif redis_key_type in ('dict','hash'):
redis_value[i]=redis_db_conn.hgetall(i)
for k,v in redis_value[i].items():
print('redis key[{}]'.format(i),"===>",k,'===>',v)
# uuid_redis_result = redis_db_conn.hgetall(i)
elif redis_key_type=='string':
#print(i)
#print(redis_db_conn.get('search_online_feature:243717827731969_sku'))
redis_value[i]=redis_db_conn.get(i)
return redis_value
if __name__ == '__main__':
userUuid='00003a93-2a32-4501-b338-755b6cb1ec49'
t=getRedisValue("search_fm_offline_feature:cd11201d3e789c63_query_offline")
print(t)
## search_fm_offline_feature:{md5(搜索词)}_query_offline
## 内部:md5加密
from databaseConn import *
from tools import *
from tools.publicFun import *
from tools.httprequest import *
from tools.listOperation import *
from databaseConn.mysqlOperation import *
from databaseConn.redisOperation import *
from recommend.publicSql import *
cur_dir=os.path.dirname(os.path.abspath(__file__))
file_path=os.path.join(cur_dir,'tempFile')
from recommend import *
from recommend.publicFunc import *
from recommend.searchTopic import *
def allAppStartTopic():
'''
:return: 获取到top150的cid3,brandname,jgid
'''
top_info=defaultdict(list)
#获取top150的cid3
cid3_concat='jg_intention_score.category_id,jg_intention_score.category_level,jg_heat_rank.rank'
new_hot_cid150_sql=hot_jd150_sql.format(cid3_concat)
cid150_df=execmysl(119,new_hot_cid150_sql)
top_info['cid3']=cid150_df[(cid150_df['category_level']==3)].sort_values(by=['rank'])['category_id'].to_list()[:30]
print('top 30 cid3==>',top_info['cid3'])
#获取top150的brandnane
brandname_concat='jg_intention_score.brand_name,jg_heat_rank.rank'
new_hot_brandname150_sql=hot_jd150_sql.format(brandname_concat)
brandname_df=execmysl(119,new_hot_brandname150_sql)
top_info['brand_name']=brandname_df[(brandname_df['brand_name']!='')].sort_values(by=['rank'])['brand_name'].to_list()[:30]
print('top 30 brandname==>',top_info['brand_name'])
#获取top150的jg
jg_concat='jg_intention_score.jg_id,jg_heat_rank.rank'
new_hot_jg150_sql=hot_jd150_sql.format(jg_concat)
jg_df=execmysl(119,new_hot_jg150_sql)
temp_jg=jg_df.sort_values(by=['rank'])['jg_id'].to_list()[:30]
top_info['jg_id']=fixpos(temp_jg)
print('top 30 jgid==>',top_info['jg_id'])
return top_info
def AllClickTopic(sku_no):
clickresutl=defaultdict(list)
sku_info=skuinfo(sku_no)
cids=[]
print(sku_info)
cids.append(sku_info.get('cid1'))
cids.append(sku_info.get('cid2'))
cids.append(sku_info.get('cid3'))
###金刚位的点击召回
jd_change_sql=concatSql(jd_hot_sql,**{"category_id":cids})
jd_click_result=normalClickTopic(sku_no,jd_change_sql)
clickresutl['jd_click']=jd_click_result
###品类的点击召回
cid_change_sql=concatSql(jg_purpost_sql,**{"jg_type":1})
cid_click_result=normalClickTopic(sku_no,cid_change_sql)
clickresutl['cid_click']=cid_click_result
##品牌的点击召回
brandname_change_sql=concatSql(jg_purpost_sql,**{"jg_type":2})
brandname_click_result=normalClickTopic(sku_no,cid_change_sql)
clickresutl['brandname_click']=brandname_click_result
return clickresutl
def allSearchTopic():
result=defaultdict(list)
top10_sku,searchinfo=step1()
##获取种子数据
seedData=getseed(top10_sku)
##根据cid3种子,获取cid2,cid1数据
cids_df=cidinfo(seedData.get('cid3')).to_dict(orient='records')[0]
seedData.update(**{'cid2':cids_df['c_id2']})
seedData.update(**{'cid1':cids_df['c_id1']})
jd_change_sql=jg_purpost_all_sql.format(seedData['cid3'],seedData['cid2'],seedData['cid1'],seedData['brand_name'])
df=execmysl(119,jd_change_sql)
conditions=concatCondition(seedData,contype='jg')
conditions1=concatCondition(seedData,contype='cid')
conditions2=concatCondition(seedData,contype='brandname')
result['jg_search']=normalSearchTopic(conditions,df)
result['cid_search']=normalSearchTopic(conditions1,df)
result['brandname_search']=normalSearchTopic(conditions2,df)
return result
def getoperate():
'''
:return: 获取业务固定位
'''
result=requests.get('http://kdsp-api-test1.liangkebang.net/api/kdsp/index/ex/effective-config',headers={"qg-tenant-id":"560761"})
configinfo=jsonpath.jsonpath(result.json(),'$..config')[0]
print(result.json())
df=pd.DataFrame(configinfo)
temp_df=df[df["fixedPosition"]==2].sort_values(by='rank')
temp_df['rank']=temp_df['rank'].astype('int')
#print('获取业务金刚位数据:',temp_df)
return temp_df[['id','rank']].to_dict(orient='records')
def fixpos(recall_list):
'''
:param recall_list:
:return: 召回数据去掉固定位后,再按照固定位置插入到召回数据中
'''
pos=getoperate()
##当召回数据中有固定位时,则去掉固定位
[recall_list.remove(i['id']) for i in pos if i['id'] in recall_list]
for i in pos:
recall_list.insert(i.get('rank')-1,i['id'])
return recall_list
if __name__ == '__main__':
t=1
print(t)
from recommend import *
def dapanFill(num=300):
'''
:param num: 从大盘取多少条数据
:return:
'''
temp=defaultdict()
df=execmysl(119,dapan_sql)
temp['recall_reason']='大盘补足'
temp['skus']=df['sku_no'].to_list()[:num]
return temp
def minPriceFill(skus,num=100):
'''
:param skus: dict,要包括:cid3,brand_name eg:{'id': 154060057, 'sku_no': '37993171924993', 'price': 39.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2}
:return:
'''
temp=[]
concat_cid3_sql=concatSql(skuinfo_sql,**{"cid3":skus.get('cid3')})
concat_brandname_sql=concatSql(skuinfo_sql,**{"brand_name":skus.get('brand_name')})
sku_price=float(skus.get('price'))
brandname_df=execmysl(119,concat_brandname_sql)
brandname_df['price']=brandname_df['price'].astype('float')
brandname_df['diff_price']=abs(brandname_df['price']-sku_price)
brandname_min_price_sku=brandname_df.sort_values(by=['diff_price'])[['sku_no','diff_price','price']]#.to_list()[:num]
print("同品牌[{}]价格差最小的商品信息".format(skus.get('brand_name')),brandname_min_price_sku)
temp+=brandname_min_price_sku['sku_no'].to_list()[:num]
if len(temp)<num:
cid3_df=execmysl(119,concat_cid3_sql)
cid3_df['price']=cid3_df['price'].astype('float')
#t=cid3_df[cid3_df['sku_no']=='361165411589122']#['price']
if cid3_df.empty:
return temp
cid3_df['diff_price']=abs(cid3_df['price']-sku_price)
cid3_min_price_sku=cid3_df.sort_values(by=['diff_price'])[['sku_no','diff_price','price']]#.to_list()[:num]
print("同cid3[{}]价格差最小的商品信息".format(skus.get('cid3')),
cid3_min_price_sku)
temp+=cid3_min_price_sku['sku_no'].to_list()
return temp[:num]
def skuinfo(sku):
change_sql=concatSql(skuinfo_sql,**{"sku_no":sku})
sku_df=execmysl(119,change_sql)
sku_info=sku_df[['cid1','cid2','cid3','brand_name']].to_dict(orient='records')[0]
return sku_info
def normalClickTopic(sku_no,change_sql):
sku_info=skuinfo(sku_no)
# cids=[]
# print(sku_info)
# cids.append(sku_info.get('cid1'))
# cids.append(sku_info.get('cid2'))
# cids.append(sku_info.get('cid3'))
# change_sql=concatSql(jd_hot_sql,**{"category_id":cids})
hot_df=execmysl(119,change_sql)
final_result=[]
if 'category_level' in change_sql:
cid3_condition="hot_df['category_level']==3"
cid2_condition="hot_df['category_level']==2"
cid1_condition="hot_df['category_level']==1"
no_cid123_condition="hot_df['category_id']==-1"
else:
cid3_condition="hot_df['cid3']==sku_info.get('cid3')"
cid2_condition="hot_df['cid3']==sku_info.get('cid2')"
cid1_condition="hot_df['cid3']==sku_info.get('cid1')"
no_cid123_condition="hot_df['cid3']==-1"
condition1=hot_df[eval(cid3_condition) & (hot_df['brand_name']==sku_info.get('brand_name'))] \
.sort_values(ascending=False,by='final_score')
final_result+=condition1['jg_id'].to_list()
print('condition1:',final_result)
if len(set(final_result))<30:
condition2=hot_df[eval(cid2_condition) & (hot_df['brand_name']==sku_info.get('brand_name'))] \
.sort_values(ascending=False,by='final_score')
final_result+=condition2['jg_id'].to_list()
print('condition2:',final_result)
if len(set(final_result))<30:
condition3=hot_df[eval(cid1_condition) & (hot_df['brand_name']==sku_info.get('brand_name'))] \
.sort_values(ascending=False,by='final_score')
final_result+=condition3['jg_id'].to_list()
print('condition3:',final_result)
if len(set(final_result))<30:
condition4=hot_df[eval(cid3_condition)& (hot_df['brand_name']=='no_brand_name') ] \
.sort_values(ascending=False,by='final_score')
final_result+=condition4['jg_id'].to_list()
print('condition4:',final_result)
if len(set(final_result))<30:
condition5=hot_df[eval(cid2_condition)& (hot_df['brand_name']=='no_brand_name') ] \
.sort_values(ascending=False,by='final_score')
final_result+=condition5['jg_id'].to_list()
print('condition5:',final_result)
if len(set(final_result))<30:
condition6=hot_df[eval(cid1_condition) & (hot_df['brand_name']=='no_brand_name') ] \
.sort_values(ascending=False,by='final_score')
final_result+=condition6['jg_id'].to_list()
print('condition6:',final_result)
if len(set(final_result))<30:
condition7=hot_df[(hot_df['brand_name']==sku_info.get('brand_name')) & eval(no_cid123_condition) ] \
.sort_values(ascending=False,by='final_score')
final_result+=condition7['jg_id'].to_list()
print('condition7:',final_result)
if len(set(final_result))<30:
final_result+=jgDapanBu()
#print(final_result)
final_result_temp=list(set(final_result))
final_result_temp.sort(key=final_result.index)
print('去重结果:',len(final_result_temp),final_result_temp)
return final_result_temp[:30]
def normalSearchTopic(conditions,df):
result=[]
# seed_cid3=seed_data['cid3']
# cid2=seed_data['cid2']
# cid1=seed_data['cid1']
# seed_brandname=seed_data['brand_name']
#df=execmysl(119,sql)
for k in conditions:
condition1=df[eval(k['condition'])].sort_values(by=[k['by']],ascending=False)['jg_id'].to_list()
result+=condition1
if len(set(result))>30:
break
return result
def concatCondition(seedData,contype='jg'):
if contype=='jg':
condition1="(df['cid3']=={})".format(seedData['cid3']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])
condition2="(df['cid2']=={})".format(seedData['cid2']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])
condition3="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])
condition4="(df['cid3']=={})".format(seedData['cid3']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')
condition5="(df['cid2']=={})".format(seedData['cid2']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')
condition6="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')
condition7="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])
conditions=[]
conditions.append({'condition':condition1,'by':'score_cid3'})
conditions.append({'condition':condition2,'by':'score_cid2'})
conditions.append({'condition':condition3,'by':'score_cid1'})
conditions.append({'condition':condition4,'by':'score_cid3'})
conditions.append({'condition':condition5,'by':'score_cid2'})
conditions.append({'condition':condition6,'by':'score_cid1'})
conditions.append({'condition':condition7,'by':'score_brand'})
return conditions
elif contype=='cid':
condition1="(df['cid3']=={})".format(seedData['cid3']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'] + \
"&" +"(df['jg_type']==1)"
)
condition2="(df['cid2']=={})".format(seedData['cid2']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])+"&" +"(df['jg_type']==1)"
condition3="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])+"&" +"(df['jg_type']==1)"
condition4="(df['cid3']=={})".format(seedData['cid3']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')+"&" +"(df['jg_type']==1)"
condition5="(df['cid2']=={})".format(seedData['cid2']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')+"&" +"(df['jg_type']==1)"
condition6="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')+"&" +"(df['jg_type']==1)"
condition7="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])+"&" +"(df['jg_type']==1)"
conditions=[]
conditions.append({'condition':condition1,'by':'score_cid3'})
conditions.append({'condition':condition2,'by':'score_cid2'})
conditions.append({'condition':condition3,'by':'score_cid1'})
conditions.append({'condition':condition4,'by':'score_cid3'})
conditions.append({'condition':condition5,'by':'score_cid2'})
conditions.append({'condition':condition6,'by':'score_cid1'})
conditions.append({'condition':condition7,'by':'score_brand'})
return conditions
elif contype=='brandname':
condition1="(df['cid3']=={})".format(seedData['cid3']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'] + \
"&" +"(df['jg_type']==2)"
)
condition2="(df['cid2']=={})".format(seedData['cid2']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])+"&" +"(df['jg_type']==2)"
condition3="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])+"&" +"(df['jg_type']==2)"
condition4="(df['cid3']=={})".format(seedData['cid3']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')+"&" +"(df['jg_type']==2)"
condition5="(df['cid2']=={})".format(seedData['cid2']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')+"&" +"(df['jg_type']==2)"
condition6="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format('no_brand_name')+"&" +"(df['jg_type']==2)"
condition7="(df['cid1']=={})".format(seedData['cid1']) + \
"&" +"(df['brand_name']=='{}')".format(seedData['brand_name'])+"&" +"(df['jg_type']==2)"
conditions=[]
conditions.append({'condition':condition1,'by':'score_cid3'})
conditions.append({'condition':condition2,'by':'score_cid2'})
conditions.append({'condition':condition3,'by':'score_cid1'})
conditions.append({'condition':condition4,'by':'score_cid3'})
conditions.append({'condition':condition5,'by':'score_cid2'})
conditions.append({'condition':condition6,'by':'score_cid1'})
conditions.append({'condition':condition7,'by':'score_brand'})
return conditions
def jgDapanBu():
df=execmysl(119,jg_dapan_sql)
print(df['jg_id'])
return df['jg_id'].to_list()
def cidinfo(**kwargs):
sql="""
select distinct c_id1,c_id2,c_id3,brand_name from kdsp.t_sku_info where status=3 and tenant_id=560761 and c_id3={}
""".format(kwargs.get('cid3'))
df=execmysl('test1',sql)
return df
if __name__ == '__main__':
skus={'id': 154060057, 'sku_no': '37993171924993', 'price': 39.9, 'cid1': 1320, 'cid2': 1583,
'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2}
#print(minPriceFill(skus))
# sku_no='5187546'
# sku_info=skuinfo(sku_no)
# cids=[]
# print(sku_info)
# cids.append(sku_info.get('cid1'))
# cids.append(sku_info.get('cid2'))
# cids.append(sku_info.get('cid3'))
# change_sql=concatSql(jd_hot_sql,**{"category_id":cids})
# #change_sql=concatSql(jd_hot_sql,)
# t=normalClickTopic(sku_no,change_sql)
# print(t)
print(concatCondition(skus))
##大盘表
dapan_sql='''
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from
offline_recommend.recommend_same_product
'''
##商品表
skuinfo_sql='''
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id,source_type from
offline_recommend.recommend_product_info
'''
##相似表
similar_redis='product_similarity:{skuno}'
##商品关联度
correlation_redis='product_correlation:${skuNo}'
##本批次+上一批次
cur_batch_sql="""
select a.*,b.sku_no,b.recall_reason from
(select id,recall_batch_uuid from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a
join nearline_recommend.recommend_product_record b on a.recall_batch_uuid=b.recall_batch_uuid
"""
#b.jg_id,b.category_id,b.category_level,b.final_score,a.rank,a.click_num
hot_jd150_sql="""
select distinct {} from offline_recommend.jg_heat_rank
join offline_recommend.jg_intention_score on jg_heat_rank.jg_id=jg_intention_score.jg_id
order by jg_heat_rank.rank desc
"""
##推荐位用户意图得分表
jd_hot_sql="""
select jg_id,category_id,category_level,brand_name,final_score from offline_recommend.jg_intention_score
"""
##金刚位大盘表-金刚位热度表
jg_dapan_sql="""
select * from offline_recommend.jg_heat_rank order by rank desc limit 30
"""
##推荐位用户意图得分表
jg_purpost_sql="""
"""
jg_purpost_all_sql="""
select * from where cid3={} or cid2={} or cid1={} or brand_name='{}'
"""
from recommend import *
from recommend.publicFunc import *
def step1():
'''
:return: 触发文本搜索
'''
res=sendsearch(isFirstPage=1)
top10_sku=res[0].get('skunos').split(',')[:10]
# print(top10_sku)
return top10_sku,res
def getseed(top10_sku):
'''
:param top10_sku:
:return: 根据top10的skus,获取cid3种子,brand_name种子,平均价格
'''
result={}
#top10_sku=['37993180305409', '37993297751553', '37993171924993', '275043759168001', '37993633292801', '37993473904641', '37993926891009', '37993482297345', '10982072256513', '37993239032321']
result['sku_nos']=top10_sku
sql=concatSql(skuinfo_sql,**{'sku_no':top10_sku})
print(sql)
df=execmysl(119,sql)
df['cid3']=df['cid3'].astype('string')
cid3=df.groupby(by=['cid3']).groups.__repr__()#['cid3']#.max()
brandname=df.groupby(by=['brand_name']).groups.__repr__()
result['cid3']=maxdict(**json.loads(cid3.replace("'",'"')))
result['brand_name']=maxdict(**json.loads(brandname.replace("'",'"')))
result['avg_price']=df['price'].mean()
result['skuinfo']=df.to_dict(orient='records')
return result
def similarskus(top10_sku):
'''
:param top10_sku:
:return: top10的相似数据
'''
rediskeys=[similar_redis.format(skuno=i) for i in top10_sku]
result=defaultdict(list)
temp=dict(zip(top10_sku,rediskeys))
for k,v in temp.items():
temp[k]=getRedisValue(v)
return temp
def seedSkus(top10_sku):
'''
:param top10_sku:
:return: 当每个商品的都没有相似数据时,则需要找每个商品同brandname|cid3,价格差最小的top10
'''
seeddata=getseed(top10_sku)
temp=[]
for ssku in seeddata.get('skuinfo'):
sku=minPriceFill(ssku,10)
temp+=sku
print("商品[{}]价格差最小的补足数据".format(ssku),sku)
return temp
if __name__ == '__main__':
# top10_sku=[1,2,3,4]
# print(similarskus(top10_sku))
aa={}
skus={'skuinfo': [{'id': 13573, 'sku_no': '10982072256513', 'price': 139.9,'cid1': 1320, 'cid2': 1583, 'cid3': '1592', 'brand_name': '蜀道香','brand_id': 'nan', 'source_type': 6},
{'id': 229065184, 'sku_no': '275043759168001', 'price': 25.0, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃','brand_id': 'nan', 'source_type': 1},
{'id': 154060057, 'sku_no': '37993171924993', 'price': 39.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 154058134, 'sku_no': '37993180305409', 'price': 36.0, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 55340, 'sku_no': '37993239032321', 'price': 21.9, 'cid1': 1320, 'cid2': 1583,'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 534359815, 'sku_no': '37993297751553', 'price': 69.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '盐津铺子', 'brand_id': 2574.0, 'source_type': 2},
{'id': 337773081, 'sku_no': '37993473904641', 'price': 35.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 154060077, 'sku_no': '37993633292801', 'price': 13.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 56569, 'sku_no': '37993926891009', 'price': 19.0, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2}]}
print('最终结果:',seedSkus(skus))
\ No newline at end of file
import json
from hashlib import md5
from search import *
from publicSql import *
import numpy as np
def md5Secret(keyword):
return md5(keyword.encode()).hexdigest()[8:-8]
def compartResult(keyword):
codes=[]
def getFeaturesCode():
filename='/Users/dm/Downloads/量化派/需求文档/hash_dic_key.txt'
codes=[]
with open(filename,'r',encoding='utf-8') as file:
temp=json.load(file)
for k in temp.keys():
if k.startswith('query'):## 只验证query特征
codes.append(k.split('#')[0])
return list(set(codes)),temp
def getHashValue():
filename='/Users/dm/Downloads/量化派/需求文档/hash_dic_val.txt'
with open(filename,'r',encoding='utf-8') as file:
return json.load(file)
def skuinfos(skuno):
sql=concatSql(skuinfo_sql,**{"sku_no":skuno})
df=execmysl(119,sql)
print(sql,df)
if df.empty:
raise Exception('该skuno[{}]不存在'.format(skuno))
cid1_redis=cid1_rediskey.format(df['cid1'].to_list()[0])
cid2_redis=cid2_rediskey.format(df['cid2'].to_list()[0])
cid3_redis=cid3_rediskey.format(df['cid3'].to_list()[0])
brand_name_redis=brandname_rediskey.format(md5Secret(df['brand_name'].to_list()[0]))
skuno_redis=sku_rediskey.format(skuno)
return df,cid1_redis,cid2_redis,cid3_redis,brand_name_redis,skuno_redis
def getSkuIndex(queryword,skuno):
df,cid1_redis,cid2_redis,cid3_redis,brand_name_redis,skuno_redis=skuinfos(skuno)
codes,keyfile=getFeaturesCode()
real_values=defaultdict()
queryword_redis=FM_rediskey.format(md5Secret(queryword))
cid1_cid2_cid3_brandname_sku=defaultdict()
FM_df=getRedisValue(queryword_redis).get(queryword_redis)
for i in codes:
if 'cid1' in i:
tv=json.loads(FM_df[cid1_redis])
ttt=jsonpath.jsonpath(tv,'$..{}'.format(i))
#print(i,'===',v)
real_values[i]=ttt[0] if ttt else 0
if 'cid2' in i :
tv=json.loads(FM_df[cid2_redis])
ttt=jsonpath.jsonpath(tv,'$..{}'.format(i))
#print(i,'===',v)
real_values[i]=ttt[0] if ttt else 0
if 'brand_name' in i :
tv=json.loads(FM_df[brand_name_redis])
ttt=jsonpath.jsonpath(tv,'$..{}'.format(i))
#print(i,'===',v)
real_values[i]=ttt[0] if ttt else 0
if 'sku' in i :
tv=json.loads(FM_df[skuno_redis])
ttt=jsonpath.jsonpath(tv,'$..{}'.format(i))
#print(i,'===',v)
real_values[i]=ttt[0] if ttt else 0
return dict(real_values)
def result():
ces_values=getSkuIndex('伊丽莎白雅顿','100002751095')
codes,keyfile=getFeaturesCode()
print(codes)
tt=defaultdict()
for i in codes:
t=keyfile[i]
for index,ii in enumerate(t):
tces_values= ces_values.get(i) or 0
if float(tces_values)<=ii :#and i=='query_brand_name_pv_click_rate_1d':
tt[i]=index-1 if index-1>0 else 0
#temp=index-1
print(ii,i,"获取的特征数据值",tces_values,'组结果',tt[i],'-->组数据',t)
break
elif float(tces_values)>=t[-1]:
tt[i]=len(t)-2
print(ii,i,"获取的特征数据值",tces_values,'组结果',tt[i],'-->组数据',t)
return tt
def devData():
a={"if_query_brand":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":0},"if_query_cid2":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":2},"if_query_cid3":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":4},"sale_price":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":12},"profit":{"fea_origin_val":"190.0","hash_dic_key":13,"hash_dic_val":27},"sale_count":{"fea_origin_val":"24","hash_dic_key":10,"hash_dic_val":57},"ranking_points":{"fea_origin_val":"1.8739182465997146","hash_dic_key":0,"hash_dic_val":72},"skuid_brand_name_click_times_user_24h":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":84},"skuid_cid1_click_times_user_24h":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":87},"skuid_brand_name_click_times_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":96},"skuid_cid3_click_times_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":100},"skuid_cid1_click_times_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":107},"query_brand_name_click_times_7d":{"fea_origin_val":"8","hash_dic_key":4,"hash_dic_val":132},"query_brand_name_click_users_7d":{"fea_origin_val":"3","hash_dic_key":1,"hash_dic_val":139},"query_brand_name_click_times_1d":{"fea_origin_val":"1","hash_dic_key":0,"hash_dic_val":151},"query_brand_name_click_users_1d":{"fea_origin_val":"1","hash_dic_key":0,"hash_dic_val":162},"query_cid3_click_times_7d":{"fea_origin_val":"7","hash_dic_key":2,"hash_dic_val":172},"query_cid3_click_users_7d":{"fea_origin_val":"2","hash_dic_key":1,"hash_dic_val":190},"query_cid3_click_times_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":207},"query_cid3_click_users_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":220},"query_cid2_click_times_7d":{"fea_origin_val":"7","hash_dic_key":1,"hash_dic_val":234},"query_cid2_click_users_7d":{"fea_origin_val":"2","hash_dic_key":1,"hash_dic_val":252},"query_cid2_click_times_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":270},"query_cid2_click_users_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":284},"query_cid1_click_times_7d":{"fea_origin_val":"7","hash_dic_key":1,"hash_dic_val":300},"query_cid1_click_users_7d":{"fea_origin_val":"2","hash_dic_key":0,"hash_dic_val":323},"query_cid1_click_times_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":336},"query_cid1_click_users_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":350},"query_skuid_click_times_7d":{"fea_origin_val":"2","hash_dic_key":1,"hash_dic_val":368},"query_skuid_click_users_7d":{"fea_origin_val":"1","hash_dic_key":0,"hash_dic_val":375},"query_skuid_click_times_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":384},"query_skuid_click_users_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":389},"query_brand_name_pv_click_rate_7d":{"fea_origin_val":"0.111111","hash_dic_key":15,"hash_dic_val":403},"query_brand_name_uv_click_rate_7d":{"fea_origin_val":"1.0","hash_dic_key":15,"hash_dic_val":424},"query_brand_name_pv_click_rate_1d":{"fea_origin_val":"1.0","hash_dic_key":10,"hash_dic_val":433},"query_brand_name_uv_click_rate_1d":{"fea_origin_val":"1.0","hash_dic_key":10,"hash_dic_val":440},"query_cid3_pv_click_rate_7d":{"fea_origin_val":"0.28","hash_dic_key":16,"hash_dic_val":455},"query_cid3_uv_click_rate_7d":{"fea_origin_val":"0.4","hash_dic_key":7,"hash_dic_val":466},"query_cid3_pv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":483},"query_cid3_uv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":497},"query_cid2_pv_click_rate_7d":{"fea_origin_val":"0.107692","hash_dic_key":17,"hash_dic_val":518},"query_cid2_uv_click_rate_7d":{"fea_origin_val":"0.4","hash_dic_key":4,"hash_dic_val":529},"query_cid2_pv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":547},"query_cid2_uv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":562},"query_cid1_pv_click_rate_7d":{"fea_origin_val":"0.090909","hash_dic_key":17,"hash_dic_val":582},"query_cid1_uv_click_rate_7d":{"fea_origin_val":"0.4","hash_dic_key":3,"hash_dic_val":596},"query_cid1_pv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":616},"query_cid1_uv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":631},"query_skuid_pv_click_rate_7d":{"fea_origin_val":"0.4","hash_dic_key":11,"hash_dic_val":648},"query_skuid_uv_click_rate_7d":{"fea_origin_val":"0.2","hash_dic_key":11,"hash_dic_val":658},"query_skuid_pv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":666},"query_skuid_uv_click_rate_1d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":672},"uv_click_rate_1h":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":677},"uv_click_rate_24h":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":683},"uv_click_rate_7d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":698},"uv_click_rate_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":725},"pv_click_rate_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":744},"click_rate_gender_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":763},"click_rate_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":772},"brand_name_click_rate_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":774},"cid3_click_rate_gender_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":792},"cid3_click_rate_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":798},"cid2_click_rate_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":805},"cid1_click_rate_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":815},"skuid_brand_name_totalclick_times_user_7d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":827},"skuid_cid1_totalclick_times_user_7d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":831},"skuid_totalclick_times_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":841},"skuid_cid1_totalclick_times_user_40d":{"fea_origin_val":"0","hash_dic_key":0,"hash_dic_val":844}}
return a
if __name__ == '__main__':
# codevaule=100000
# a= [0.0, 2.0, 6.0, 12.0, 22.0, 38.0, 58.0, 88.0, 131.0, 183.0, 270.0, 385.0, 495.0, 652.0, 918.0, 1475.0, 3600.0, 9050.0]
# # t=np.array(a)
# # print(pd.cut(t,bins=a,labels=False))
# #tindex=0
# for index, i in enumerate(a):
# if codevaule<i:
# tindex=index
# break
# elif float(codevaule)>=i:
# tindex=len(a)-1
#
# print(tindex)
print('获取到的组数据',result())
print("****"*100)
#print('--',result())
a={'query_cid1_click_users_1d': 0, 'query_skuid_uv_click_rate_7d': 11, 'query_cid3_pv_click_rate_1d': 0, 'query_cid3_click_times_1d': 0, 'query_cid1_pv_click_rate_7d': 17, 'query_cid2_click_users_1d': 0, 'query_brand_name_click_times_1d': 0, 'query_cid1_click_users_7d': 0, 'query_cid1_click_times_1d': 0, 'query_cid3_uv_click_rate_1d': 0, 'query_brand_name_click_users_7d': 1, 'query_cid2_uv_click_rate_1d': 0, 'query_cid3_pv_click_rate_7d': 16, 'query_cid3_click_times_7d': 2, 'query_skuid_click_users_1d': 0, 'query_cid2_uv_click_rate_7d': 4, 'query_skuid_pv_click_rate_7d': 11, 'query_cid1_pv_click_rate_1d': 0, 'query_skuid_click_times_7d': 1, 'query_cid3_click_users_7d': 1, 'query_skuid_uv_click_rate_1d': 0, 'query_cid2_click_times_7d': 1, 'query_skuid_pv_click_rate_1d': 0, 'query_brand_name_pv_click_rate_7d': 15, 'query_brand_name_uv_click_rate_7d': 15, 'query_skuid_click_users_7d': 0, 'query_cid1_uv_click_rate_7d': 3, 'query_cid1_uv_click_rate_1d': 0, 'query_cid2_pv_click_rate_7d': 17, 'query_cid2_click_times_1d': 0, 'query_brand_name_click_times_7d': 4, 'query_cid2_pv_click_rate_1d': 0, 'query_brand_name_click_users_1d': 0, 'query_cid3_uv_click_rate_7d': 7, 'query_brand_name_uv_click_rate_1d': 10, 'query_cid3_click_users_1d': 0, 'query_brand_name_pv_click_rate_1d': 10, 'query_cid1_click_times_7d': 1, 'query_cid2_click_users_7d': 1, 'query_skuid_click_times_1d': 0}
deva=devData()
base_values=getHashValue()
for k,v in a.items():
dv=deva[k]['hash_dic_val']==base_values[k+'_group#'+str(v)]
print(k,'开发结果:',dv,deva[k]['hash_dic_val'],' 测试结果:',base_values[k+'_group#'+str(v)])
from databaseConn import *
from tools import *
from tools.fileOperation import *
from tools.listOperation import *
from databaseConn.redisOperation import *
from tools.publicFun import *
cur_dir=os.path.dirname(os.path.abspath(__file__))
file_path=os.path.join(cur_dir,'tempFile')
\ No newline at end of file
import os
import random
import pandas as pd
from search import *
def requestsend(uuid,deviceid,searchContent,selectedActivitie,page=1,searchtype='txt',env='pro'):
"""
:param uuid: 用户uuid
:param deviceid: 设备号id
:param searchContent: 文本搜索
:param page: 默认1,搜索页
:param selectedActivitie: 专题id
:param type: txt表示文本搜索,否则为专题搜索
:param env: 环境
:return:
"""
if env not in ['test','pro']:
raise Exception('env[{}]只能:test|pro'.format(env))
baseurl="http://open-search-engine.ec-{env}.qg-rc.net".format(env=env) if env=='test' \
else "http://open-search-engine.qg-{env}.qgwebservice.com".format(env=env)
url=baseurl+"/search"
header = {
"XQG-USER-UUID": uuid,
"content-type": "application/json",
"XQG-DEVICE-ID": deviceid
}
body = {
"channel": random.sample(channel, 1)[0],
"userUuid": uuid,
"deviceId": deviceid,
"extraParams": {
"appVersion": "8.6.00",
"terminal": "ios"
},
##文本搜索
# "searchContent":searchContent ,
##专题搜索
# "selectedActivities":[{
# "id": ''.join(random.sample([str(i) for i in range(40)],1)),
# "type":2
# }],
"flipInfo": {
"pageNo": page,
"pageSize": 20
}
}
if searchtype == 'txt': ##文本
body['searchContent'] = searchContent
elif searchtype == 'label': ##
body['selectedActivities'] = [{
"id": selectedActivitie, # ''.join(random.sample(','.join('215,214,34,33,32,31,30'),1)),
"type": 2
}]
elif searchtype == 'capt':
body['selectedActivities'] = [{
"id": selectedActivitie, # ''.join(random.sample(','.join('215,214,34,33,32,31,30'),1)),
"type": 3
}]
elif searchtype == 'activity':
body['selectedActivities'] = [{
"id": selectedActivitie, # ''.join(random.sample(','.join('215,214,34,33,32,31,30'),1)),
"type": 1
}]
t = requests.post(url, json=body, headers=header)
print('搜索词:',searchContent)
try:
spu_nos = jsonpath.jsonpath(t.json(), '$..skuNo') or ['null']
sku_names = jsonpath.jsonpath(t.json(), '$..skuName') or ['null']
page_size = jsonpath.jsonpath(t.json(), '$..totalPage')[0]
search_id = jsonpath.jsonpath(t.json(), '$..searchId')[0]
total_skus=jsonpath.jsonpath(t.json(), '$..totalCount')[0]
request_time = t.elapsed.total_seconds()
tttt=','.join(spu_nos)
#print(tttt,type(tttt))
# with open('sku_result_3.txt', 'a+') as file:
# file.write('搜索词:' + searchContent + ";result:" + ','.join(sku_names) + "\n")
return page_size, ','.join(spu_nos), str(request_time), search_id,','.join(sku_names),searchContent,total_skus
except:
print('搜索接口报错:',searchContent,t.text)
def hotWord(filename,sheetname=0):
from tools.fileOperation import readRenameColums
filepath=os.path.join(data_file_path,filename)
df=readRenameColums(filepath,fcolums=['search_words'],sheetname=sheetname)
return df
def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname=0,ishotwordrand=1):
'''
:param selectedActivitie: 选填
:param type: 跑文本搜索,还是专题搜索。目前支持:'txt'|'label'|'capt'|'activity'
:param num: 和ishotwordrand组合,如果ishotwordrand=1,则随机获取搜索词;如果为0,则按照顺序获取搜索词
:param isFirstPage: 如果只跑第一页,则需要设置为true
:param sheetname:获取搜索词的sheetname,默认0,第一个工作薄
:return:
'''
timenow=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
temp=[]
result_temp={}
searchContents=hotWord('线上环境搜索热词.xlsx',sheetname=sheetname)['search_words'].apply(lambda x:str(x).strip()).to_list()
if ishotwordrand:
random_value=random.Random().randint(0,len(searchContents)-1)
else:
random_value=num
from tools.publicFun import genUuidDeviceid
uuid,deviceid=genUuidDeviceid()
selectedActivitie='2'
flag=1
try:
page_size, spu_nos, \
request_time, search_id,sku_names,searchContent,total_skus=\
requestsend(uuid,deviceid,searchContents[random_value],selectedActivitie,flag,type)
result_temp["nowtime"]=str(timenow)
result_temp["deviceid"]=deviceid
result_temp["uuid"]=uuid
result_temp["searchcontent"]=searchContent
result_temp["pageno"]=flag
result_temp["costtime"]=request_time
result_temp["skunos"]=spu_nos
result_temp['totalSkus']=total_skus
temp.append(result_temp)
file_template=["nowtime","searchcontent","pageno","costtime","deviceid","uuid","skunos"]
flag+=1
while flag<=page_size and not isFirstPage:
result_temp={}
page_size, spu_nos, \
request_time, search_id,sku_names,searchContent= \
requestsend(uuid,deviceid,searchContents[random_value],selectedActivitie,flag,type)
result_temp["nowtime"]=timenow
result_temp["searchcontent"]=searchContent
result_temp["pageno"]=flag
result_temp["costtime"]=request_time
result_temp["deviceid"]=deviceid
result_temp["uuid"]=uuid
result_temp["skunos"]=spu_nos
result_temp['totalSkus']=total_skus
temp.append(result_temp)
flag+=1
#filename=genReportName('searchResult')
print(temp)
return temp
except:
traceback.print_exc(limit=2)
def allRun(num,isFirstPage=0,sheetname=0,ishotwordrand=1):
ttemp=[]
for i in range(num):
print('第[{}]次搜索'.format(str(i)))
try:
temp=totalrun(num=i,isFirstPage=isFirstPage,sheetname=sheetname,ishotwordrand=ishotwordrand)
ttemp+=temp
#print(temp)
except:
traceback.print_exc(limit=2)
df=pd.DataFrame(ttemp)
reportname=filePath(file_path,'searchResult')
#df['skunos']=df['skunos'].astype('string')
df.to_excel(reportname,index=0,encoding = 'utf-8',float_format = str)
def analysis(filename=None):
#temp=defaultdict()
result=[]
filename=filename if filename \
else os.popen("cd {};ls -t |grep -v 'init' |head -1".format(file_path)).read(100)
filename_path=os.path.join(file_path,filename.strip())
print('解析文件名:',filename_path)
df=readFile(filename_path)
df['searchcontent'].fillna('无效',inplace=True)
mean_costtime=str(round(df['costtime'].mean(),3)*1000)+'ms'
count_by=['uuid','deviceid','searchcontent']
uds=df.groupby(count_by)#['skunos']
#count_by_skus=count_by.append('skunos')
#df1=pd.DataFrame(spus,columns=count_by_skus)#['skunos']
for uuid,deviceid,searchcontent in uds.groups:
temp={}
temp['uuid']=uuid
temp['deviceid']=deviceid
temp['searchcontent']=searchcontent
tempskus=df[(df['uuid']==uuid) & (df['deviceid']==deviceid) \
&(df['searchcontent']==searchcontent)]['skunos'].to_list()#.__str__()
temp['costtime']=df[(df['uuid']==uuid) & (df['deviceid']==deviceid) \
&(df['searchcontent']==searchcontent)]['costtime'].to_list()
temp['totalSkus']=mergelist([str(i).split(',') for i in tempskus])
#print(tempskus)
temp['isrepeat']=0 if len(temp['totalSkus']) == len(list(set(temp['totalSkus']))) else 1
temp['curpage_countskus']=len(temp['totalSkus'])
#print(uuid,deviceid,searchcontent)
#temppp=1
temp['countskus']=df[(df['uuid']==uuid) & (df['deviceid']==deviceid) \
&(df['searchcontent']==searchcontent)]['totalSkus'].to_list()[0]
result.append(temp)
startans=pd.DataFrame(result)
fist_200=startans[(startans['countskus']<=200)]
fist_500=startans[(startans['countskus']>200) & (startans['countskus']<=500)]
fist_1000=startans[(startans['countskus']>500) & (startans['countskus']<=1000)]
fist1_200_1=[i for i in mergelist(fist_200['costtime'].to_list()) if str(i)>='0.1']
fist1_200_2=[i for i in mergelist(fist_200['costtime'].to_list()) if str(i)>='0.15']
fist1_200_3=[i for i in mergelist(fist_200['costtime'].to_list()) if str(i)>='0.2']
fist1_500_1=[i for i in mergelist(fist_500['costtime'].to_list()) if str(i)>='0.1']
fist1_500_2=[i for i in mergelist(fist_500['costtime'].to_list()) if str(i)>='0.15']
fist1_500_3=[i for i in mergelist(fist_500['costtime'].to_list()) if str(i)>='0.2']
fist1_1000_1=[i for i in mergelist(fist_1000['costtime'].to_list()) if str(i)>='0.1']
fist1_1000_2=[i for i in mergelist(fist_1000['costtime'].to_list()) if str(i)>='0.15']
fist1_1000_3=[i for i in mergelist(fist_1000['costtime'].to_list()) if str(i)>='0.2']
print('总请求数:',startans.shape[0],';平均耗时:',mean_costtime)
print("spu是否有重复数据:",1 if startans[startans['isrepeat']==1].shape[0] else 0)
print("搜索词spus总数<=200时-->",'耗时大于100ms的总数',len(fist1_200_1),\
';耗时大于150ms的总数',len(fist1_200_2),
';耗时大于200ms的总数',len(fist1_200_3))
print("搜索词spus总数<=500&>200时-->",'耗时大于100ms的总数',len(fist1_500_1), \
';耗时大于150ms的总数',len(fist1_500_2),
';耗时大于200ms的总数',len(fist1_500_3))
print("搜索词spus总数<=1000&>500时-->",'耗时大于100ms的总数',len(fist1_1000_1), \
';耗时大于150ms的总数',len(fist1_1000_2),
';耗时大于200ms的总数',len(fist1_1000_3))
#return result
if __name__ == '__main__':
# t=hotWord('线上环境搜索热词.xlsx')['search_words'].apply(lambda x:str(x).strip())
# print(t)
#print(totalrun(isFirstPage=1))
isexec=0
ishotwordrand=0
if isexec:
allRun(1000,isFirstPage=1,sheetname=3,ishotwordrand=ishotwordrand)
else:
print(analysis())
# a=[['236473224274432', '7188063'],['236473224274432', '7188063']]
# print(list(itertools.chain.from_iterable(a)))
skuinfo_sql="""
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from
offline_recommend.recommend_same_product
"""
FM_rediskey='search_fm_offline_feature:{}_query_offline'
cid1_rediskey='{}_cid1_offline'#.format(df['cid1'].to_list()[0])
cid2_rediskey='{}_cid2_offline'#.format(df['cid2'].to_list()[0])
cid3_rediskey='{}_cid3_offline'#.format(df['cid3'].to_list()[0])
brandname_rediskey='{}_brand_name_offline'
sku_rediskey='{}_sku_offline'
\ No newline at end of file
from databaseConn.mysqlOperation import *
from recommend.publicSql import *
from databaseConn import *
\ No newline at end of file
from tools import *
import pandas as pd
import numpy as np
def modifyPasseord(name,namespace):
#conn_db_11=create_engine('mysql://qa:{}@172.17.5.11:30077'.format('qatest'))
#conn_db_11=create_engine('mysql://qa:{}@172.17.5.48:31393'.format('qatest'))
conn_db_11=mysql_universal(namespace)
import hashlib
pw=b'123456qwe'
get_salt="""
select salt from stms.t_sys_user where account='{name}'
""".format(name=name)
salt_df=pd.read_sql(get_salt,con=conn_db_11)
temp=salt_df.to_dict(orient='records')
if not temp:
print('[name] is not exist'.format(name=name))
return 0
salt=temp[0]['salt'].encode()
#salt=b'UwKESe3cvf703Z30' #t_sys_user.salt
tt=hashlib.sha512(pw+salt).hexdigest()
sql="""
update stms.t_sys_user set password='{password}' where account='{name}'
""".format(password=tt,name=name)
try:
pd.read_sql(sql,con=conn_db_11)
except:
print("this is update")
if __name__ == '__main__':
modifyPasseord('chao.dong','vcc3')
\ No newline at end of file
import os.path
from databaseConn import *
import pandas as pd
def genReportName(name=''):
'''
:param name:
:return:生成文件名
'''
t=round(datetime.datetime.now().timestamp())
return name+'_'+str(t)+'.xlsx'
def filePath(filepath,name=''):
filename=genReportName(name)
return os.path.join(filepath,filename)
def readFile(filename,sheetname=0):
'''
:param filename:
:return:读取csx或xlsx文件
'''
houzhui=filename.split('.')[-1]
if houzhui=='csv':
df=pd.read_csv(filename, index_col=0,sheetname=sheetname)
else:
df=pd.read_excel(filename,sheet_name=sheetname)
return df
def deleteFile(filename):
if os.path.exists(filename):
os.system('rm {}'.format(filename))
return 'delete succ'
return '{} is not found'.format(filename)
def readRenameColums(filename,fcolums,sheetname=0):
'''
:param filename:
:param fcolums:list,重命名的数据
:return:列名重命名
'''
df=readFile(filename,sheetname)
try:
df=df.rename(columns=dict(zip(df.columns,fcolums[0:len(df.columns)])))
except Exception as e:
print('重命名的列名数[{}]小于文档中的列名数[{}],导致无法重命名'.format(len(fcolums),len(df.columns)))
#print('===接口',traceback.print_exc())
##获取错误的堆栈信息
print("异常的堆栈信息:")
traceback.format_exc(limit=2)
return df
if __name__ == '__main__':
filename="/Users/dm/Downloads/量化派/测试用例/召回测试耗时的数据.xlsx"
df=readRenameColums(filename,['id'])
print(df)
import requests
from search.abSearch import totalrun
from tools import *
from tools.publicFun import genUuidDeviceid
from tools.fileOperation import *
def sendhttp(url,header=None,body=None,methodtype='get'):
if methodtype=='get':
rep=requests.request(url=url,method=methodtype,headers=header,params=body)
else:
rep=requests.request(url=url,method=methodtype,headers=header,data=body)
return rep.json()
def sendfeed(uuid):
'''
:param uuid:
:return: 触发feed流召回,即appstart-topic
'''
uuid,deviced=genUuidDeviceid()
url="http://aws-online-recommend-parent.ec-test.qg-rc.net/recommend/hodgepodge_stream?deviceId={}" \
"&userUuid={}&unionItemType=product_jd&pageSize=20&pageType=2&ip=192.168.28.142".format(uuid,deviced)
t=sendhttp(url)
return t
def sendsearch(isFirstPage=1):
'''
:param isFirstPage:
:return: 触发搜索召回
'''
t=totalrun(isFirstPage=isFirstPage)
return t
def sendhit(skuno,namespace):
'''
:param isFirstPage:
:return: 触发相似推荐召回,即:点击topic
'''
k={'sku_no':skuno}
sql=concatSql(skuinfo_sql,**k)
skuinfo=execmysl(namespace,sql)
if skuinfo.empty:
raise Exception('skuno[{}]不存在'.format(skuno))
uuid,deviced=genUuidDeviceid()
skutype=skuinfo['source_type'].to_list()[0]
url="http://172.20.1.131:23060/recommend/similarity_products?sourceId={}&appVersion=8.7.00" \
"&gid={}&sourceType={}&ip=192.168.29.228" \
"&channel=159913&userUuid={}&pageSize=20" \
"&parentPageType=&terminal=MINI-APP".format(skuno,deviced,skutype,uuid)
t=sendhttp(url)
return t
if __name__ == '__main__':
print(sendsearch(isFirstPage=1))
from databaseConn import *
def listCross(a,b):
'''
:param a:list1
:param b:list2
:return:list交叉排序
'''
#print(a,b)
temp=[]
flen=len(b) if len(a)>len(b) else len(a)
print(flen)
for i in range(flen):
temp.append(a.pop(0))
temp.append(b.pop(0))
temp+=a
temp+=b
return temp
def removeRepeat(a):
'''
:param a:list
:return:按顺序去掉重复词
'''
temp=list(set(a))
temp.sort(key=a.index)
return temp
def mergelist(a):
'''
:param a:list
:return: 嵌套list合并成一个
'''
t=itertools.chain.from_iterable(a)
return list(t)
if __name__ == '__main__':
a=[1,2,4,2,1,5,8]
print(removeRepeat(a))
from tools import *
def genUuidDeviceid():
"""
deviced必须随机生成,不然无法统计spu总数。故就算uuid是相同的,deviceid也不能相同
:return:
"""
uuid,deviced=codeuuid.uuid4().urn.split(':')[-1],codeuuid.uuid4().urn.split(':')[-1]
uuid='c1d7ff4e-ee78-48de-8b8d-50c2af29c3ff'
deviced='guiqiuyue_'+str(deviced)
return uuid,deviced
def maxdict(**kwargs):
return max(kwargs,key=lambda x:len(kwargs[x]))
def strTodict(k):
if isinstance(k,str):
return json.loads(k.replace("'",'"'))
raise Exception('[{}]不支持转为dict'.format(k))
def listTodict(k):
'''
:param k: [dict]
:return:
'''
temp=defaultdict()
for i in k:
for k1,v1 in i.items():
if temp[k1]:
raise Exception("列表中的dict的key[{}]有重复数据,转换类型失败".format(k1))
temp[k1]=v1
return dict(temp)
def timenow():
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
if __name__ == '__main__':
t=[{'15':[1]},{'1590': [1]}, {'1592': [0]},{'1592':[1]}]
print(listTodict(t))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment