Commit 336b7cd5 authored by 桂秋月's avatar 桂秋月

修改search脚本

parent e36447d4
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
...@@ -2,8 +2,10 @@ ...@@ -2,8 +2,10 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true"> <component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output /> <exclude-output />
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$">
<orderEntry type="inheritedJdk" /> <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>
\ No newline at end of file
import jsonpath,requests,os,re,json,sys,traceback,datetime,random,time,itertools import jsonpath,requests,os,re,json,sys,traceback,datetime,random,time,itertools
from pymongo import MongoClient
from collections import defaultdict from collections import defaultdict
import uuid as codeuuid import uuid as codeuuid
cur_dir=os.path.dirname(os.path.abspath(__file__)) cur_dir=os.path.dirname(os.path.abspath(__file__))
...@@ -44,5 +45,15 @@ mysql_info={ ...@@ -44,5 +45,15 @@ mysql_info={
"pwd":"Qa6pxybKE8KgsUGV", "pwd":"Qa6pxybKE8KgsUGV",
"host":"172.30.5.27", "host":"172.30.5.27",
"port":"7438" "port":"7438"
},
"nearly":{
"name":"nearlinerds",
"pwd":"3^4f=GZeA$",
"host":"recommend-nearline-prod-5-7.cr22rydq7l0o.rds.cn-north-1.amazonaws.com.cn",
"port":"3306"
} }
} }
mongo_info={
}
\ No newline at end of file
from databaseConn import * from databaseConn import *
from databaseConn.mysqlOperation import mysqlInfo
def mongo_universal(namespace):
mysql_universal="mongodb://{name}:{pwd}@{host}:{port}"
mysql_info=mysqlInfo(namespace,dbtype='mongodb')
#print(mysql_info)
temp=mysql_universal.format(**mysql_info)
print("mongodb连接信息:",temp)
return temp
def mongoClient(namespace,db="app_vcc2"):
'''
:param namespace:
:param db:
:return: 自动连接该db
'''
temp=mongo_universal(namespace)
return eval("MongoClient(temp).{db}".format(db=db))
def tableSearch(namespace,tablename,**kwargs):
'''
:param namespace: 测试环境的namesapce
:param tablename: 表名
:param kwargs: 需要包括库名和查询规则,默认是list处理。
eg:{"db":"app_vcc2","query":[{"event":"loginSuccess"#,"userId":"70549608"},{"_id":1}]}
:return: 返回find结果
'''
temp=''
db=mongoClient(namespace,kwargs.get('db'))
tables=[i['name'] for i in db.list_collections()]
if tablename not in tables:
raise Exception('table[{}]不在databse[{}]中,所有的table如下:'.format(tablename,kwargs.get('db')),tables)
#print("db结果",db,kwargs.get('query'))
change_result="db.{tablename}.find(".format(tablename=tablename)
for i in kwargs.get('query'):
temp+=str(i)+','
change_result+=temp[:-1]+")"
print("mongo拼接结果:",change_result)
return eval(change_result)
if __name__ == '__main__':
key={"db":"ocrDb","query":[{
#,"userId":"70549608"
},{"_id":1}]}
temp=tableSearch('qa','system.version',**key)
result=[i for i in temp]
print(len(result))
for i in temp:
print('===',i)
print(temp)
\ No newline at end of file
...@@ -9,12 +9,14 @@ from databaseConn import * ...@@ -9,12 +9,14 @@ from databaseConn import *
#conn_db_2_5 = create_engine('mysql://root:123456@172.29.2.5:3306') #conn_db_2_5 = create_engine('mysql://root:123456@172.29.2.5:3306')
# conn_db_5_16 = create_engine('mysql://root:root@172.29.5.16:3306') # conn_db_5_16 = create_engine('mysql://root:root@172.29.5.16:3306')
def mysqlInfo(namespace): def mysqlInfo(namespace,dbtype='mysql'):
""" """
:param namespace: :param namespace:
:param dbtype:查询的类型,目前支持mysql和mongo
:return: 获取namespace对应的mysql信息,对大数据的mysql不适用,此时需要自定义mysql_info :return: 获取namespace对应的mysql信息,对大数据的mysql不适用,此时需要自定义mysql_info
""" """
url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}&serviceType=base&serviceName=mysql".format(namespace) url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}" \
"&serviceType=base&serviceName={dbtype}".format(namespace,dbtype=dbtype)
result=mysql_info.get(str(namespace)) or {} result=mysql_info.get(str(namespace)) or {}
if result: if result:
return result return result
......
from recommend import * from recommend import *
from recommend.publicFunc import skuinfo,dapanBu from recommend.publicFunc import skuinfo,dapanBu
def preconditions(uuid): def preconditions(uuid,sku_no):
''' '''
:param uuid: :param uuid:
:return: 判断上批次是否为加购topic,是则不触发,不是才触发 :return: 判断上批次是否为加购topic,是则不触发,不是才触发
''' '''
back_batch_redis="" try:
return True eventtype= 'orderTopic'
back_batch_change_redis=back_batch_redis.format(uuid=uuid,sku_no=sku_no)
ttt=getRedisValue(back_batch_change_redis).get(back_batch_change_redis)
cur_btach_change_sql=concatSql(cur_batch_sql,**{"recall_batch_uuid":ttt})
cur_batch_df=execmysl(119,cur_batch_sql.format(uuid))
is_order_topic= 0 if cur_batch_df.empty or not cur_batch_df['event_type'].to_list()[0]==eventtype else 1
return is_order_topic
except:
return 0
def recall_J1(skuno): def recall_J1(skuno):
''' '''
...@@ -37,7 +45,6 @@ def recall_J1(skuno): ...@@ -37,7 +45,6 @@ def recall_J1(skuno):
return temp return temp
def supply_J1(skuno): def supply_J1(skuno):
sku_info=skuinfo(skuno) sku_info=skuinfo(skuno)
dapan_change_sql=concatSql(dapan_sql,**{"cid3":sku_info.get('cid3')}) dapan_change_sql=concatSql(dapan_sql,**{"cid3":sku_info.get('cid3')})
...@@ -57,4 +64,6 @@ def recall_J3(): ...@@ -57,4 +64,6 @@ def recall_J3():
if __name__ == '__main__': if __name__ == '__main__':
print(recall_J1('12312')) #print(recall_J1('12312'))
t=preconditions(1,2)
print(t)
...@@ -5,7 +5,7 @@ select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from ...@@ -5,7 +5,7 @@ select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from
''' '''
##商品表 ##商品表
skuinfo_sql=''' skuinfo_sql='''
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id,source_type from select id,sku_no,sku_name,price,cid1,cid2,cid3,brand_name,brand_id,source_type from
offline_recommend.recommend_product_info offline_recommend.recommend_product_info
''' '''
##相似表 ##相似表
...@@ -15,12 +15,18 @@ similar_redis='product_similarity:{skuno}' ...@@ -15,12 +15,18 @@ similar_redis='product_similarity:{skuno}'
correlation_redis='product_correlation:${skuNo}' correlation_redis='product_correlation:${skuNo}'
##本批次+上一批次 ##本批次+上一批次
cur_batch_sql=""" nearly_batch_sql="""
select a.*,b.sku_no,b.recall_reason from select a.*,b.sku_no,b.recall_reason from
(select id,recall_batch_uuid from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a (select id,recall_batch_uuid,event_type from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a
join nearline_recommend.recommend_product_record b on a.recall_batch_uuid=b.recall_batch_uuid join nearline_recommend.recommend_product_record b on a.recall_batch_uuid=b.recall_batch_uuid
""" """
cur_batch_sql="""
select id,recall_batch_uuid,event_type from nearline_recommend.recommend_product_batch
"""
#b.jg_id,b.category_id,b.category_level,b.final_score,a.rank,a.click_num #b.jg_id,b.category_id,b.category_level,b.final_score,a.rank,a.click_num
hot_jd150_sql=""" hot_jd150_sql="""
select distinct {} from offline_recommend.jg_heat_rank select distinct {} from offline_recommend.jg_heat_rank
......
No preview for this file type
...@@ -5,4 +5,4 @@ from tools.listOperation import * ...@@ -5,4 +5,4 @@ from tools.listOperation import *
from databaseConn.redisOperation import * from databaseConn.redisOperation import *
from tools.publicFun import * from tools.publicFun import *
cur_dir=os.path.dirname(os.path.abspath(__file__)) cur_dir=os.path.dirname(os.path.abspath(__file__))
file_path=os.path.join(cur_dir,'tempFile') file_path=os.path.join(cur_dir,'tempFile')
\ No newline at end of file
...@@ -101,18 +101,33 @@ def requestsend(uuid,deviceid,searchContent,selectedActivitie,page=1,searchtype= ...@@ -101,18 +101,33 @@ def requestsend(uuid,deviceid,searchContent,selectedActivitie,page=1,searchtype=
t = requests.post(url, json=body, headers=header) t = requests.post(url, json=body, headers=header)
print('搜索词:',searchContent) print('搜索词:',searchContent)
try: try:
sku_nos=[]
sku_templat={"sku_no":"","cid3":"","cid2":"","cid1":"","brandName":""}
spu_nos = jsonpath.jsonpath(t.json(), '$..skuNo') or ['null'] spu_nos = jsonpath.jsonpath(t.json(), '$..skuNo') or ['null']
cid3 = jsonpath.jsonpath(t.json(), '$..cid3') or ['null']
cid2 = jsonpath.jsonpath(t.json(), '$..cid1') or ['null']
cid1 = jsonpath.jsonpath(t.json(), '$..cid2') or ['null']
brandName = jsonpath.jsonpath(t.json(), '$..brandName') or ['null']
for index,i in enumerate(spu_nos):
sku_templat["sku_no"]=i
sku_templat["cid3"]=cid3[index]
sku_templat["cid2"]=cid2[index]
sku_templat["cid1"]=cid1[index]
sku_templat["brandName"]=brandName[index]
sku_nos.append(sku_templat)
sku_names = jsonpath.jsonpath(t.json(), '$..skuName') or ['null'] sku_names = jsonpath.jsonpath(t.json(), '$..skuName') or ['null']
page_size = jsonpath.jsonpath(t.json(), '$..totalPage')[0] page_size = jsonpath.jsonpath(t.json(), '$..totalPage')[0]
search_id = jsonpath.jsonpath(t.json(), '$..searchId')[0] search_id = jsonpath.jsonpath(t.json(), '$..searchId')[0]
total_skus=jsonpath.jsonpath(t.json(), '$..totalCount')[0] total_skus=jsonpath.jsonpath(t.json(), '$..totalCount')[0]
request_time = t.elapsed.total_seconds() request_time = t.elapsed.total_seconds()
tttt=','.join(spu_nos) #tttt=','.join(spu_nos)
#print(tttt,type(tttt)) print(sku_nos)
# with open('sku_result_3.txt', 'a+') as file: # with open('sku_result_3.txt', 'a+') as file:
# file.write('搜索词:' + searchContent + ";result:" + ','.join(sku_names) + "\n") # file.write('搜索词:' + searchContent + ";result:" + ','.join(sku_names) + "\n")
return page_size, ','.join(spu_nos), str(request_time), search_id,','.join(sku_names),searchContent,total_skus return page_size,sku_nos , str(request_time), search_id,','.join(sku_names),searchContent,total_skus
#return page_size, ','.join(spu_nos), str(request_time), search_id,','.join(sku_names),searchContent,total_skus
except: except:
traceback.print_exc(limit=2)
print('搜索接口报错:',searchContent,t.text) print('搜索接口报错:',searchContent,t.text)
...@@ -179,7 +194,9 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname= ...@@ -179,7 +194,9 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname=
result_temp["searchcontent"]=searchContent result_temp["searchcontent"]=searchContent
result_temp["pageno"]=flag result_temp["pageno"]=flag
result_temp["costtime"]=request_time result_temp["costtime"]=request_time
result_temp["skunos"]=spu_nos #result_temp["skunos"]=spu_nos
for index,i in enumerate(spu_nos):
result_temp["skunos"+str(index)]=i
result_temp['totalSkus']=total_skus result_temp['totalSkus']=total_skus
temp.append(result_temp) temp.append(result_temp)
#file_template=["nowtime","searchcontent","pageno","costtime","deviceid","uuid","skunos"] #file_template=["nowtime","searchcontent","pageno","costtime","deviceid","uuid","skunos"]
...@@ -206,11 +223,14 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname= ...@@ -206,11 +223,14 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname=
traceback.print_exc(limit=2) traceback.print_exc(limit=2)
def allRun(num,isFirstPage=0,sheetname=0,ishotwordrand=1): def allRun(num,isFirstPage=0,sheetname=0,ishotwordrand=1,issleep=1):
ttemp=[] ttemp=[]
num=int(num)
for i in range(num): for i in range(num):
print('第[{}]次搜索'.format(str(i))) print('第[{}]次搜索'.format(str(i)))
try: try:
if issleep:
time.sleep(1)
temp=totalrun(num=i,isFirstPage=isFirstPage,sheetname=sheetname,ishotwordrand=ishotwordrand) temp=totalrun(num=i,isFirstPage=isFirstPage,sheetname=sheetname,ishotwordrand=ishotwordrand)
ttemp+=temp ttemp+=temp
#print(temp) #print(temp)
...@@ -309,11 +329,14 @@ if __name__ == '__main__': ...@@ -309,11 +329,14 @@ if __name__ == '__main__':
# t=hotWord('线上环境搜索热词.xlsx')['search_words'].apply(lambda x:str(x).strip()) # t=hotWord('线上环境搜索热词.xlsx')['search_words'].apply(lambda x:str(x).strip())
# print(t) # print(t)
#print(totalrun(isFirstPage=1)) #print(totalrun(isFirstPage=1))
isexec=sys.argv[2] isexec= sys.argv[2] ##判断执行接口还是分析文件
ishotwordrand=0 ishotwordrand=0
sheetname=sys.argv[3] ##xlsx工作簿
issleep=sys.argv[4] ##循环执行接口是否需要sleep
runnum=sys.argv[5] ##执行接口总数
print(sys.argv) print(sys.argv)
if isexec and isexec!='0': if isexec and isexec!='0':
allRun(1000,isFirstPage=1,sheetname=sys.argv[3],ishotwordrand=ishotwordrand) allRun(runnum,isFirstPage=1,sheetname=sheetname,ishotwordrand=ishotwordrand,issleep=issleep)
else: else:
print(analysis()) print(analysis())
# a=[['236473224274432', '7188063'],['236473224274432', '7188063']] # a=[['236473224274432', '7188063'],['236473224274432', '7188063']]
......
...@@ -8,4 +8,24 @@ cid1_rediskey='{}_cid1_offline'#.format(df['cid1'].to_list()[0]) ...@@ -8,4 +8,24 @@ cid1_rediskey='{}_cid1_offline'#.format(df['cid1'].to_list()[0])
cid2_rediskey='{}_cid2_offline'#.format(df['cid2'].to_list()[0]) cid2_rediskey='{}_cid2_offline'#.format(df['cid2'].to_list()[0])
cid3_rediskey='{}_cid3_offline'#.format(df['cid3'].to_list()[0]) cid3_rediskey='{}_cid3_offline'#.format(df['cid3'].to_list()[0])
brandname_rediskey='{}_brand_name_offline' brandname_rediskey='{}_brand_name_offline'
sku_rediskey='{}_sku_offline' sku_rediskey='{}_sku_offline'
\ No newline at end of file
##用户的搜索词结果
user_search_result_hql="""
select response_search_id,request_search_content,count(1)
-- request_user_uuid,request_search_content,response_product_result
from nrt_search.search_record_info
where dt>=date_sub(current_date(),1) and dt<current_date()
and request_search_content is not null
and length(request_search_content)>0
group by response_search_id,request_search_content
"""
##用户搜索的点击操作
user_search_hit_hsql="""
select unique_id,
device_id, uuid, skuid, source_type, click_time, page_type
from data_science.daily_product_click_expose_info
where dt>=date_sub(current_date(),1) and dt<current_date()
and page_type = 100
"""
\ No newline at end of file
from search import *
def getsearchinfo(filename):
filepath=os.path.join(data_file_path,filename)
df=readFile(filepath)
return df
def skusinfo(skus):
skuinfo_change_info=concatSql(skuinfo_sql,**{"sku_no":skus})
df=execmysl(119,skuinfo_change_info)
print(df[['sku_no','sku_name']])
return df[['sku_no','sku_name']]
def dealCustomSearch(word,isrigth=1):
'''
:param word: 自定义搜索词,慎用,这块只适合搜索词结果为空的情况
:param isrigth:
:return:
'''
if isrigth:
sku_change_info=skuinfo_sql+'like'+'%'+word+'%'
else:
sku_change_info=skuinfo_sql+'not like'+'%'+word+'%'
return sku_change_info
def compareresult(skuname,word):
pass
def score(filename):
search_df=getsearchinfo(filename)
for search in search_df.itertuples():
search_result_skus=search.__getattribute__('seaech_result')
search_df['skunames']=skusinfo(search_result_skus)['sku_name'].to_list()
return search_df
if __name__ == '__main__':
filename="searchinfo.xlsx"
t=score(filename)
print(t)
#print(t[['skunames',"search_word"]])
import copy
from databaseConn import * from databaseConn import *
def listCross(a,b): def listCross(a,b):
''' '''
...@@ -33,6 +35,24 @@ def mergelist(a): ...@@ -33,6 +35,24 @@ def mergelist(a):
t=itertools.chain.from_iterable(a) t=itertools.chain.from_iterable(a)
return list(t) return list(t)
def listGroup(tt,size=3):
'''
:param tt:
:param size:没用,预留值
:return: list分组,按照递增分组
'''
temp=[]
i=1
while True:
temp.append(tt[:i])
[tt.remove(ii) for ii in tt[:i]]
i+=1
if not tt:
break
return temp
if __name__ == '__main__': if __name__ == '__main__':
a=[1,2,4,2,1,5,8] a=[1,2,4,3,5,6,7,8]
print(removeRepeat(a)) print(listGroup(a))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment