Commit 7f7db97f authored by 桂秋月's avatar 桂秋月

大数据脚本整理,包括搜索,推荐

parents
Pipeline #1610 failed with stages
File added
# Default ignored files
/shelf/
/workspace.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/bigdata.iml" filepath="$PROJECT_DIR$/bigdata.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RunConfigurationProducerService">
<option name="ignoredProducers">
<set>
<option value="com.android.tools.idea.compose.preview.runconfiguration.ComposePreviewRunConfigurationProducer" />
</set>
</option>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
import jsonpath,requests,os,re,json,sys,traceback,datetime,random,time,itertools
from collections import defaultdict
import uuid as codeuuid
cur_dir=os.path.dirname(os.path.abspath(__file__))
project_dir=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
data_file_path=os.path.join(project_dir,'dataFile')
timenow = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
#sys.path.append(os.path.abspath(__file__))
channel = [214, 217, 1]
mysql_info={
"119":{
#'root', 'quantGroup#com', '172.24.17.119', 15307
"name":"root",
"pwd":"quantGroup#com",
"host":"172.24.17.119",
"port":"15307"
},
"220":{
"name":"recommender_user_rw",
"pwd":"rkwnPEG4",
"host":"172.30.220.11",
"port":"15307"
},
"saas5":{
"name":"root",
"pwd":"123456",
"host":"172.29.2.5",
"port":"3306"
},
"saas16":{
"name":"root",
"pwd":"root",
"host":"172.29.5.16",
"port":"3306"
},
"27":{
"name":"real_data_test",
"pwd":"qpRBSarm$Tv*YO!n",
"host":"172.30.5.27",
"port":"8066"
}
}
import pandas as pd
from sqlalchemy import create_engine
from databaseConn import *
#conn_db_48=create_engine('mysql://qa:{}@172.17.5.48:31393'.format('qatest'))
#conn_db_119 = create_engine('mysql://{}:{}@{}:{}'.format('root', 'quantGroup#com', '172.24.17.119', 15307))
#conn_db_220 = create_engine('mysql://recommender_user_rw:rkwnPEG4@172.30.220.11:15307')
#conn_db_27 = create_engine('mysql://real_data_test:{}@172.30.5.27:8066'.format('qpRBSarm$Tv*YO!n'))
# 172.29.2.5:3306 用户名:root 密码:123456 saas一期
#conn_db_2_5 = create_engine('mysql://root:123456@172.29.2.5:3306')
# conn_db_5_16 = create_engine('mysql://root:root@172.29.5.16:3306')
def mysqlInfo(namespace):
"""
:param namespace:
:return: 获取namespace对应的mysql信息,对大数据的mysql不适用,此时需要自定义mysql_info
"""
url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}&serviceType=base&serviceName=mysql".format(namespace)
result=mysql_info.get(str(namespace)) or {}
if result:
return result
try:
resp=requests.get(url)
result['host']=jsonpath.jsonpath(resp.json(),'$..lanIp')[0]
result['port']=jsonpath.jsonpath(resp.json(),'$..nodePort')[0]
result['pwd']='qatest'
result['name']='qa'
return result
except:
raise Exception("该namespace[{}]找不到对应的mysql信息".format(namespace))
def mysql_universal(namespace):
mysql_universal="mysql://{name}:{pwd}@{host}:{port}"
mysql_info=mysqlInfo(namespace)
#print(mysql_info)
return create_engine(mysql_universal.format(**mysql_info))
def execmysl(namespace,sql):
if 'select' not in sql or 'from' not in sql:
raise Exception('在mysql看来不是有效的sql',sql)
try:
df=pd.read_sql(sql,con=mysql_universal(namespace))
return df
except Exception as e:
print('mysql执行报错:')
traceback.print_exc(limit=2)
#print('mysql执行报错:',traceback.print_stack())
def concatSql(sql,**kwargs):
'''
:param sql:
:param kwargs:
:return: 只针对select的sql拼接,拼接后where是and的关系
'''
if not kwargs:
return sql
temp=' '
for k,v in kwargs.items():
if isinstance(v,(list,tuple)):
if len(v)>1:
concatinfo=k+" in "+ '{}' +' and '
temp+=concatinfo.format(tuple(v))
else:
temp+=k+'='+"'"+str(v[0])+"'" +' and '
else:
temp+=k+'='+"'"+str(v)+"'" +' and '
return sql+' where '+temp[:-4]
if __name__ == '__main__':
print(execmysl(119,'update 1'))
\ No newline at end of file
import pandas as pd
from sqlalchemy import create_engine
from collections import defaultdict
from urllib import parse
REDIS_CONFIG = {
'REDIS_NODES': [
{'host': '172.24.17.119', 'port': 6371},
{'host': '172.24.17.119', 'port': 6372},
{'host': '172.24.17.119', 'port': 6373},
{'host': '172.24.17.119', 'port': 6374},
{'host': '172.24.17.119', 'port': 6375},
{'host': '172.24.17.119', 'port': 6376}
],
'REDIS_EXPIRE_TIME': 26 * 3600,
'REDIS_MAX_CONNECTIONS': 50,
'REDIS_PASSWD': 'redis',
}
dapan_redis_config={
'REDIS_NODES': [
{'host': '172.29.2.5', 'port': 6371},
{'host': '172.29.2.5', 'port': 6372},
{'host': '172.29.2.5', 'port': 6373}
],
'REDIS_EXPIRE_TIME': 26 * 3600,
'REDIS_MAX_CONNECTIONS': 50,
'REDIS_PASSWD': '1234',
}
from rediscluster import RedisCluster
redis_db_conn_119 = RedisCluster(startup_nodes=REDIS_CONFIG.get('REDIS_NODES'),
max_connections=REDIS_CONFIG.get('REDIS_MAX_CONNECTIONS'),
password=REDIS_CONFIG.get('REDIS_PASSWD'), decode_responses=True)
# dapan_redis_db_conn = RedisCluster(startup_nodes=dapan_redis_config.get('REDIS_NODES'),
# max_connections=dapan_redis_config.get('REDIS_MAX_CONNECTIONS'),
# password=dapan_redis_config.get('REDIS_PASSWD'),
# decode_responses=True,
# skip_full_coverage_check=True)
def getRedisValue(key,conntype=None,oper='select'):
'''
:param key:
:param conntype: 区分不同的redis连接
:param oper:如果是delete,则做删除操作。目前只支持delete,select
:return:select->获取所有匹配到key的值
'''
if 'select' in key or 'from' in key:
raise Exception('在redis看来不是有效的key',key)
redis_value = defaultdict()
if conntype == 'dapan':
redis_db_conn=dapan_redis_db_conn
else:
redis_db_conn=redis_db_conn_119
uuid_redis_result=redis_db_conn.keys(key)
print('模糊匹配到的所有key:',uuid_redis_result)
if uuid_redis_result and oper=='delete':
for i in uuid_redis_result:
redis_db_conn.delete(i)
return 'delete succ'
#print(uuid_redis_result)
if not uuid_redis_result:
print("[{key}]模糊没有匹配到数据,返回0".format(key=key))
return 0
#print(uuid_redis_result,key,redis_key_type)
for i in uuid_redis_result:
redis_key_type=redis_db_conn.type(i)
if redis_key_type == 'list':
redis_value[i]=redis_db_conn.lrange(i,0,100)
# uuid_redis_result = redis_db_conn.lrange(i)
elif redis_key_type in ('dict','hash'):
redis_value[i]=redis_db_conn.hgetall(i)
for k,v in redis_value[i].items():
print('redis key[{}]'.format(i),"===>",k,'===>',v)
# uuid_redis_result = redis_db_conn.hgetall(i)
elif redis_key_type=='string':
#print(i)
#print(redis_db_conn.get('search_online_feature:243717827731969_sku'))
redis_value[i]=redis_db_conn.get(i)
return redis_value
if __name__ == '__main__':
userUuid='00003a93-2a32-4501-b338-755b6cb1ec49'
t=getRedisValue("search_fm_offline_feature:cd11201d3e789c63_query_offline")
print(t)
## search_fm_offline_feature:{md5(搜索词)}_query_offline
## 内部:md5加密
from databaseConn import *
from tools import *
from tools.publicFun import *
from tools.httprequest import *
from tools.listOperation import *
from databaseConn.mysqlOperation import *
from databaseConn.redisOperation import *
from recommend.publicSql import *
cur_dir=os.path.dirname(os.path.abspath(__file__))
file_path=os.path.join(cur_dir,'tempFile')
from recommend import *
from recommend.publicFunc import *
from recommend.searchTopic import *
def allAppStartTopic():
'''
:return: 获取到top150的cid3,brandname,jgid
'''
top_info=defaultdict(list)
#获取top150的cid3
cid3_concat='jg_intention_score.category_id,jg_intention_score.category_level,jg_heat_rank.rank'
new_hot_cid150_sql=hot_jd150_sql.format(cid3_concat)
cid150_df=execmysl(119,new_hot_cid150_sql)
top_info['cid3']=cid150_df[(cid150_df['category_level']==3)].sort_values(by=['rank'])['category_id'].to_list()[:30]
print('top 30 cid3==>',top_info['cid3'])
#获取top150的brandnane
brandname_concat='jg_intention_score.brand_name,jg_heat_rank.rank'
new_hot_brandname150_sql=hot_jd150_sql.format(brandname_concat)
brandname_df=execmysl(119,new_hot_brandname150_sql)
top_info['brand_name']=brandname_df[(brandname_df['brand_name']!='')].sort_values(by=['rank'])['brand_name'].to_list()[:30]
print('top 30 brandname==>',top_info['brand_name'])
#获取top150的jg
jg_concat='jg_intention_score.jg_id,jg_heat_rank.rank'
new_hot_jg150_sql=hot_jd150_sql.format(jg_concat)
jg_df=execmysl(119,new_hot_jg150_sql)
temp_jg=jg_df.sort_values(by=['rank'])['jg_id'].to_list()[:30]
top_info['jg_id']=fixpos(temp_jg)
print('top 30 jgid==>',top_info['jg_id'])
return top_info
def AllClickTopic(sku_no):
clickresutl=defaultdict(list)
sku_info=skuinfo(sku_no)
cids=[]
print(sku_info)
cids.append(sku_info.get('cid1'))
cids.append(sku_info.get('cid2'))
cids.append(sku_info.get('cid3'))
###金刚位的点击召回
jd_change_sql=concatSql(jd_hot_sql,**{"category_id":cids})
jd_click_result=normalClickTopic(sku_no,jd_change_sql)
clickresutl['jd_click']=jd_click_result
###品类的点击召回
cid_change_sql=concatSql(jg_purpost_sql,**{"jg_type":1})
cid_click_result=normalClickTopic(sku_no,cid_change_sql)
clickresutl['cid_click']=cid_click_result
##品牌的点击召回
brandname_change_sql=concatSql(jg_purpost_sql,**{"jg_type":2})
brandname_click_result=normalClickTopic(sku_no,cid_change_sql)
clickresutl['brandname_click']=brandname_click_result
return clickresutl
def allSearchTopic():
result=defaultdict(list)
top10_sku,searchinfo=step1()
##获取种子数据
seedData=getseed(top10_sku)
##根据cid3种子,获取cid2,cid1数据
cids_df=cidinfo(seedData.get('cid3')).to_dict(orient='records')[0]
seedData.update(**{'cid2':cids_df['c_id2']})
seedData.update(**{'cid1':cids_df['c_id1']})
jd_change_sql=jg_purpost_all_sql.format(seedData['cid3'],seedData['cid2'],seedData['cid1'],seedData['brand_name'])
df=execmysl(119,jd_change_sql)
conditions=concatCondition(seedData,contype='jg')
conditions1=concatCondition(seedData,contype='cid')
conditions2=concatCondition(seedData,contype='brandname')
result['jg_search']=normalSearchTopic(conditions,df)
result['cid_search']=normalSearchTopic(conditions1,df)
result['brandname_search']=normalSearchTopic(conditions2,df)
return result
def getoperate():
'''
:return: 获取业务固定位
'''
result=requests.get('http://kdsp-api-test1.liangkebang.net/api/kdsp/index/ex/effective-config',headers={"qg-tenant-id":"560761"})
configinfo=jsonpath.jsonpath(result.json(),'$..config')[0]
print(result.json())
df=pd.DataFrame(configinfo)
temp_df=df[df["fixedPosition"]==2].sort_values(by='rank')
temp_df['rank']=temp_df['rank'].astype('int')
#print('获取业务金刚位数据:',temp_df)
return temp_df[['id','rank']].to_dict(orient='records')
def fixpos(recall_list):
'''
:param recall_list:
:return: 召回数据去掉固定位后,再按照固定位置插入到召回数据中
'''
pos=getoperate()
##当召回数据中有固定位时,则去掉固定位
[recall_list.remove(i['id']) for i in pos if i['id'] in recall_list]
for i in pos:
recall_list.insert(i.get('rank')-1,i['id'])
return recall_list
if __name__ == '__main__':
t=1
print(t)
This diff is collapsed.
##大盘表
dapan_sql='''
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from
offline_recommend.recommend_same_product
'''
##商品表
skuinfo_sql='''
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id,source_type from
offline_recommend.recommend_product_info
'''
##相似表
similar_redis='product_similarity:{skuno}'
##商品关联度
correlation_redis='product_correlation:${skuNo}'
##本批次+上一批次
cur_batch_sql="""
select a.*,b.sku_no,b.recall_reason from
(select id,recall_batch_uuid from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a
join nearline_recommend.recommend_product_record b on a.recall_batch_uuid=b.recall_batch_uuid
"""
#b.jg_id,b.category_id,b.category_level,b.final_score,a.rank,a.click_num
hot_jd150_sql="""
select distinct {} from offline_recommend.jg_heat_rank
join offline_recommend.jg_intention_score on jg_heat_rank.jg_id=jg_intention_score.jg_id
order by jg_heat_rank.rank desc
"""
##推荐位用户意图得分表
jd_hot_sql="""
select jg_id,category_id,category_level,brand_name,final_score from offline_recommend.jg_intention_score
"""
##金刚位大盘表-金刚位热度表
jg_dapan_sql="""
select * from offline_recommend.jg_heat_rank order by rank desc limit 30
"""
##推荐位用户意图得分表
jg_purpost_sql="""
"""
jg_purpost_all_sql="""
select * from where cid3={} or cid2={} or cid1={} or brand_name='{}'
"""
from recommend import *
from recommend.publicFunc import *
def step1():
'''
:return: 触发文本搜索
'''
res=sendsearch(isFirstPage=1)
top10_sku=res[0].get('skunos').split(',')[:10]
# print(top10_sku)
return top10_sku,res
def getseed(top10_sku):
'''
:param top10_sku:
:return: 根据top10的skus,获取cid3种子,brand_name种子,平均价格
'''
result={}
#top10_sku=['37993180305409', '37993297751553', '37993171924993', '275043759168001', '37993633292801', '37993473904641', '37993926891009', '37993482297345', '10982072256513', '37993239032321']
result['sku_nos']=top10_sku
sql=concatSql(skuinfo_sql,**{'sku_no':top10_sku})
print(sql)
df=execmysl(119,sql)
df['cid3']=df['cid3'].astype('string')
cid3=df.groupby(by=['cid3']).groups.__repr__()#['cid3']#.max()
brandname=df.groupby(by=['brand_name']).groups.__repr__()
result['cid3']=maxdict(**json.loads(cid3.replace("'",'"')))
result['brand_name']=maxdict(**json.loads(brandname.replace("'",'"')))
result['avg_price']=df['price'].mean()
result['skuinfo']=df.to_dict(orient='records')
return result
def similarskus(top10_sku):
'''
:param top10_sku:
:return: top10的相似数据
'''
rediskeys=[similar_redis.format(skuno=i) for i in top10_sku]
result=defaultdict(list)
temp=dict(zip(top10_sku,rediskeys))
for k,v in temp.items():
temp[k]=getRedisValue(v)
return temp
def seedSkus(top10_sku):
'''
:param top10_sku:
:return: 当每个商品的都没有相似数据时,则需要找每个商品同brandname|cid3,价格差最小的top10
'''
seeddata=getseed(top10_sku)
temp=[]
for ssku in seeddata.get('skuinfo'):
sku=minPriceFill(ssku,10)
temp+=sku
print("商品[{}]价格差最小的补足数据".format(ssku),sku)
return temp
if __name__ == '__main__':
# top10_sku=[1,2,3,4]
# print(similarskus(top10_sku))
aa={}
skus={'skuinfo': [{'id': 13573, 'sku_no': '10982072256513', 'price': 139.9,'cid1': 1320, 'cid2': 1583, 'cid3': '1592', 'brand_name': '蜀道香','brand_id': 'nan', 'source_type': 6},
{'id': 229065184, 'sku_no': '275043759168001', 'price': 25.0, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃','brand_id': 'nan', 'source_type': 1},
{'id': 154060057, 'sku_no': '37993171924993', 'price': 39.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 154058134, 'sku_no': '37993180305409', 'price': 36.0, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 55340, 'sku_no': '37993239032321', 'price': 21.9, 'cid1': 1320, 'cid2': 1583,'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 534359815, 'sku_no': '37993297751553', 'price': 69.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '盐津铺子', 'brand_id': 2574.0, 'source_type': 2},
{'id': 337773081, 'sku_no': '37993473904641', 'price': 35.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 154060077, 'sku_no': '37993633292801', 'price': 13.9, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2},
{'id': 56569, 'sku_no': '37993926891009', 'price': 19.0, 'cid1': 1320, 'cid2': 1583, 'cid3': '1590', 'brand_name': '口水娃', 'brand_id': 10590.0, 'source_type': 2}]}
print('最终结果:',seedSkus(skus))
\ No newline at end of file
This diff is collapsed.
from databaseConn import *
from tools import *
from tools.fileOperation import *
from tools.listOperation import *
from databaseConn.redisOperation import *
from tools.publicFun import *
cur_dir=os.path.dirname(os.path.abspath(__file__))
file_path=os.path.join(cur_dir,'tempFile')
\ No newline at end of file
This diff is collapsed.
skuinfo_sql="""
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from
offline_recommend.recommend_same_product
"""
FM_rediskey='search_fm_offline_feature:{}_query_offline'
cid1_rediskey='{}_cid1_offline'#.format(df['cid1'].to_list()[0])
cid2_rediskey='{}_cid2_offline'#.format(df['cid2'].to_list()[0])
cid3_rediskey='{}_cid3_offline'#.format(df['cid3'].to_list()[0])
brandname_rediskey='{}_brand_name_offline'
sku_rediskey='{}_sku_offline'
\ No newline at end of file
from databaseConn.mysqlOperation import *
from recommend.publicSql import *
from databaseConn import *
\ No newline at end of file
from tools import *
import pandas as pd
import numpy as np
def modifyPasseord(name,namespace):
#conn_db_11=create_engine('mysql://qa:{}@172.17.5.11:30077'.format('qatest'))
#conn_db_11=create_engine('mysql://qa:{}@172.17.5.48:31393'.format('qatest'))
conn_db_11=mysql_universal(namespace)
import hashlib
pw=b'123456qwe'
get_salt="""
select salt from stms.t_sys_user where account='{name}'
""".format(name=name)
salt_df=pd.read_sql(get_salt,con=conn_db_11)
temp=salt_df.to_dict(orient='records')
if not temp:
print('[name] is not exist'.format(name=name))
return 0
salt=temp[0]['salt'].encode()
#salt=b'UwKESe3cvf703Z30' #t_sys_user.salt
tt=hashlib.sha512(pw+salt).hexdigest()
sql="""
update stms.t_sys_user set password='{password}' where account='{name}'
""".format(password=tt,name=name)
try:
pd.read_sql(sql,con=conn_db_11)
except:
print("this is update")
if __name__ == '__main__':
modifyPasseord('chao.dong','vcc3')
\ No newline at end of file
import os.path
from databaseConn import *
import pandas as pd
def genReportName(name=''):
'''
:param name:
:return:生成文件名
'''
t=round(datetime.datetime.now().timestamp())
return name+'_'+str(t)+'.xlsx'
def filePath(filepath,name=''):
filename=genReportName(name)
return os.path.join(filepath,filename)
def readFile(filename,sheetname=0):
'''
:param filename:
:return:读取csx或xlsx文件
'''
houzhui=filename.split('.')[-1]
if houzhui=='csv':
df=pd.read_csv(filename, index_col=0,sheetname=sheetname)
else:
df=pd.read_excel(filename,sheet_name=sheetname)
return df
def deleteFile(filename):
if os.path.exists(filename):
os.system('rm {}'.format(filename))
return 'delete succ'
return '{} is not found'.format(filename)
def readRenameColums(filename,fcolums,sheetname=0):
'''
:param filename:
:param fcolums:list,重命名的数据
:return:列名重命名
'''
df=readFile(filename,sheetname)
try:
df=df.rename(columns=dict(zip(df.columns,fcolums[0:len(df.columns)])))
except Exception as e:
print('重命名的列名数[{}]小于文档中的列名数[{}],导致无法重命名'.format(len(fcolums),len(df.columns)))
#print('===接口',traceback.print_exc())
##获取错误的堆栈信息
print("异常的堆栈信息:")
traceback.format_exc(limit=2)
return df
if __name__ == '__main__':
filename="/Users/dm/Downloads/量化派/测试用例/召回测试耗时的数据.xlsx"
df=readRenameColums(filename,['id'])
print(df)
import requests
from search.abSearch import totalrun
from tools import *
from tools.publicFun import genUuidDeviceid
from tools.fileOperation import *
def sendhttp(url,header=None,body=None,methodtype='get'):
if methodtype=='get':
rep=requests.request(url=url,method=methodtype,headers=header,params=body)
else:
rep=requests.request(url=url,method=methodtype,headers=header,data=body)
return rep.json()
def sendfeed(uuid):
'''
:param uuid:
:return: 触发feed流召回,即appstart-topic
'''
uuid,deviced=genUuidDeviceid()
url="http://aws-online-recommend-parent.ec-test.qg-rc.net/recommend/hodgepodge_stream?deviceId={}" \
"&userUuid={}&unionItemType=product_jd&pageSize=20&pageType=2&ip=192.168.28.142".format(uuid,deviced)
t=sendhttp(url)
return t
def sendsearch(isFirstPage=1):
'''
:param isFirstPage:
:return: 触发搜索召回
'''
t=totalrun(isFirstPage=isFirstPage)
return t
def sendhit(skuno,namespace):
'''
:param isFirstPage:
:return: 触发相似推荐召回,即:点击topic
'''
k={'sku_no':skuno}
sql=concatSql(skuinfo_sql,**k)
skuinfo=execmysl(namespace,sql)
if skuinfo.empty:
raise Exception('skuno[{}]不存在'.format(skuno))
uuid,deviced=genUuidDeviceid()
skutype=skuinfo['source_type'].to_list()[0]
url="http://172.20.1.131:23060/recommend/similarity_products?sourceId={}&appVersion=8.7.00" \
"&gid={}&sourceType={}&ip=192.168.29.228" \
"&channel=159913&userUuid={}&pageSize=20" \
"&parentPageType=&terminal=MINI-APP".format(skuno,deviced,skutype,uuid)
t=sendhttp(url)
return t
if __name__ == '__main__':
print(sendsearch(isFirstPage=1))
from databaseConn import *
def listCross(a,b):
'''
:param a:list1
:param b:list2
:return:list交叉排序
'''
#print(a,b)
temp=[]
flen=len(b) if len(a)>len(b) else len(a)
print(flen)
for i in range(flen):
temp.append(a.pop(0))
temp.append(b.pop(0))
temp+=a
temp+=b
return temp
def removeRepeat(a):
'''
:param a:list
:return:按顺序去掉重复词
'''
temp=list(set(a))
temp.sort(key=a.index)
return temp
def mergelist(a):
'''
:param a:list
:return: 嵌套list合并成一个
'''
t=itertools.chain.from_iterable(a)
return list(t)
if __name__ == '__main__':
a=[1,2,4,2,1,5,8]
print(removeRepeat(a))
from tools import *
def genUuidDeviceid():
"""
deviced必须随机生成,不然无法统计spu总数。故就算uuid是相同的,deviceid也不能相同
:return:
"""
uuid,deviced=codeuuid.uuid4().urn.split(':')[-1],codeuuid.uuid4().urn.split(':')[-1]
uuid='c1d7ff4e-ee78-48de-8b8d-50c2af29c3ff'
deviced='guiqiuyue_'+str(deviced)
return uuid,deviced
def maxdict(**kwargs):
return max(kwargs,key=lambda x:len(kwargs[x]))
def strTodict(k):
if isinstance(k,str):
return json.loads(k.replace("'",'"'))
raise Exception('[{}]不支持转为dict'.format(k))
def listTodict(k):
'''
:param k: [dict]
:return:
'''
temp=defaultdict()
for i in k:
for k1,v1 in i.items():
if temp[k1]:
raise Exception("列表中的dict的key[{}]有重复数据,转换类型失败".format(k1))
temp[k1]=v1
return dict(temp)
def timenow():
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
if __name__ == '__main__':
t=[{'15':[1]},{'1590': [1]}, {'1592': [0]},{'1592':[1]}]
print(listTodict(t))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment