Commit 336b7cd5 authored by 桂秋月's avatar 桂秋月

修改search脚本

parent e36447d4
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
......@@ -2,8 +2,10 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
import jsonpath,requests,os,re,json,sys,traceback,datetime,random,time,itertools
from pymongo import MongoClient
from collections import defaultdict
import uuid as codeuuid
cur_dir=os.path.dirname(os.path.abspath(__file__))
......@@ -44,5 +45,15 @@ mysql_info={
"pwd":"Qa6pxybKE8KgsUGV",
"host":"172.30.5.27",
"port":"7438"
},
"nearly":{
"name":"nearlinerds",
"pwd":"3^4f=GZeA$",
"host":"recommend-nearline-prod-5-7.cr22rydq7l0o.rds.cn-north-1.amazonaws.com.cn",
"port":"3306"
}
}
mongo_info={
}
\ No newline at end of file
from databaseConn import *
from databaseConn.mysqlOperation import mysqlInfo
def mongo_universal(namespace):
mysql_universal="mongodb://{name}:{pwd}@{host}:{port}"
mysql_info=mysqlInfo(namespace,dbtype='mongodb')
#print(mysql_info)
temp=mysql_universal.format(**mysql_info)
print("mongodb连接信息:",temp)
return temp
def mongoClient(namespace,db="app_vcc2"):
'''
:param namespace:
:param db:
:return: 自动连接该db
'''
temp=mongo_universal(namespace)
return eval("MongoClient(temp).{db}".format(db=db))
def tableSearch(namespace,tablename,**kwargs):
'''
:param namespace: 测试环境的namesapce
:param tablename: 表名
:param kwargs: 需要包括库名和查询规则,默认是list处理。
eg:{"db":"app_vcc2","query":[{"event":"loginSuccess"#,"userId":"70549608"},{"_id":1}]}
:return: 返回find结果
'''
temp=''
db=mongoClient(namespace,kwargs.get('db'))
tables=[i['name'] for i in db.list_collections()]
if tablename not in tables:
raise Exception('table[{}]不在databse[{}]中,所有的table如下:'.format(tablename,kwargs.get('db')),tables)
#print("db结果",db,kwargs.get('query'))
change_result="db.{tablename}.find(".format(tablename=tablename)
for i in kwargs.get('query'):
temp+=str(i)+','
change_result+=temp[:-1]+")"
print("mongo拼接结果:",change_result)
return eval(change_result)
if __name__ == '__main__':
key={"db":"ocrDb","query":[{
#,"userId":"70549608"
},{"_id":1}]}
temp=tableSearch('qa','system.version',**key)
result=[i for i in temp]
print(len(result))
for i in temp:
print('===',i)
print(temp)
\ No newline at end of file
......@@ -9,12 +9,14 @@ from databaseConn import *
#conn_db_2_5 = create_engine('mysql://root:123456@172.29.2.5:3306')
# conn_db_5_16 = create_engine('mysql://root:root@172.29.5.16:3306')
def mysqlInfo(namespace):
def mysqlInfo(namespace,dbtype='mysql'):
"""
:param namespace:
:param dbtype:查询的类型,目前支持mysql和mongo
:return: 获取namespace对应的mysql信息,对大数据的mysql不适用,此时需要自定义mysql_info
"""
url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}&serviceType=base&serviceName=mysql".format(namespace)
url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}" \
"&serviceType=base&serviceName={dbtype}".format(namespace,dbtype=dbtype)
result=mysql_info.get(str(namespace)) or {}
if result:
return result
......
from recommend import *
from recommend.publicFunc import skuinfo,dapanBu
def preconditions(uuid):
def preconditions(uuid,sku_no):
'''
:param uuid:
:return: 判断上批次是否为加购topic,是则不触发,不是才触发
'''
back_batch_redis=""
return True
try:
eventtype= 'orderTopic'
back_batch_change_redis=back_batch_redis.format(uuid=uuid,sku_no=sku_no)
ttt=getRedisValue(back_batch_change_redis).get(back_batch_change_redis)
cur_btach_change_sql=concatSql(cur_batch_sql,**{"recall_batch_uuid":ttt})
cur_batch_df=execmysl(119,cur_batch_sql.format(uuid))
is_order_topic= 0 if cur_batch_df.empty or not cur_batch_df['event_type'].to_list()[0]==eventtype else 1
return is_order_topic
except:
return 0
def recall_J1(skuno):
'''
......@@ -37,7 +45,6 @@ def recall_J1(skuno):
return temp
def supply_J1(skuno):
sku_info=skuinfo(skuno)
dapan_change_sql=concatSql(dapan_sql,**{"cid3":sku_info.get('cid3')})
......@@ -57,4 +64,6 @@ def recall_J3():
if __name__ == '__main__':
print(recall_J1('12312'))
#print(recall_J1('12312'))
t=preconditions(1,2)
print(t)
......@@ -5,7 +5,7 @@ select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from
'''
##商品表
skuinfo_sql='''
select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id,source_type from
select id,sku_no,sku_name,price,cid1,cid2,cid3,brand_name,brand_id,source_type from
offline_recommend.recommend_product_info
'''
##相似表
......@@ -15,12 +15,18 @@ similar_redis='product_similarity:{skuno}'
correlation_redis='product_correlation:${skuNo}'
##本批次+上一批次
cur_batch_sql="""
nearly_batch_sql="""
select a.*,b.sku_no,b.recall_reason from
(select id,recall_batch_uuid from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a
(select id,recall_batch_uuid,event_type from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a
join nearline_recommend.recommend_product_record b on a.recall_batch_uuid=b.recall_batch_uuid
"""
cur_batch_sql="""
select id,recall_batch_uuid,event_type from nearline_recommend.recommend_product_batch
"""
#b.jg_id,b.category_id,b.category_level,b.final_score,a.rank,a.click_num
hot_jd150_sql="""
select distinct {} from offline_recommend.jg_heat_rank
......
No preview for this file type
......@@ -101,18 +101,33 @@ def requestsend(uuid,deviceid,searchContent,selectedActivitie,page=1,searchtype=
t = requests.post(url, json=body, headers=header)
print('搜索词:',searchContent)
try:
sku_nos=[]
sku_templat={"sku_no":"","cid3":"","cid2":"","cid1":"","brandName":""}
spu_nos = jsonpath.jsonpath(t.json(), '$..skuNo') or ['null']
cid3 = jsonpath.jsonpath(t.json(), '$..cid3') or ['null']
cid2 = jsonpath.jsonpath(t.json(), '$..cid1') or ['null']
cid1 = jsonpath.jsonpath(t.json(), '$..cid2') or ['null']
brandName = jsonpath.jsonpath(t.json(), '$..brandName') or ['null']
for index,i in enumerate(spu_nos):
sku_templat["sku_no"]=i
sku_templat["cid3"]=cid3[index]
sku_templat["cid2"]=cid2[index]
sku_templat["cid1"]=cid1[index]
sku_templat["brandName"]=brandName[index]
sku_nos.append(sku_templat)
sku_names = jsonpath.jsonpath(t.json(), '$..skuName') or ['null']
page_size = jsonpath.jsonpath(t.json(), '$..totalPage')[0]
search_id = jsonpath.jsonpath(t.json(), '$..searchId')[0]
total_skus=jsonpath.jsonpath(t.json(), '$..totalCount')[0]
request_time = t.elapsed.total_seconds()
tttt=','.join(spu_nos)
#print(tttt,type(tttt))
#tttt=','.join(spu_nos)
print(sku_nos)
# with open('sku_result_3.txt', 'a+') as file:
# file.write('搜索词:' + searchContent + ";result:" + ','.join(sku_names) + "\n")
return page_size, ','.join(spu_nos), str(request_time), search_id,','.join(sku_names),searchContent,total_skus
return page_size,sku_nos , str(request_time), search_id,','.join(sku_names),searchContent,total_skus
#return page_size, ','.join(spu_nos), str(request_time), search_id,','.join(sku_names),searchContent,total_skus
except:
traceback.print_exc(limit=2)
print('搜索接口报错:',searchContent,t.text)
......@@ -179,7 +194,9 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname=
result_temp["searchcontent"]=searchContent
result_temp["pageno"]=flag
result_temp["costtime"]=request_time
result_temp["skunos"]=spu_nos
#result_temp["skunos"]=spu_nos
for index,i in enumerate(spu_nos):
result_temp["skunos"+str(index)]=i
result_temp['totalSkus']=total_skus
temp.append(result_temp)
#file_template=["nowtime","searchcontent","pageno","costtime","deviceid","uuid","skunos"]
......@@ -206,11 +223,14 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname=
traceback.print_exc(limit=2)
def allRun(num,isFirstPage=0,sheetname=0,ishotwordrand=1):
def allRun(num,isFirstPage=0,sheetname=0,ishotwordrand=1,issleep=1):
ttemp=[]
num=int(num)
for i in range(num):
print('第[{}]次搜索'.format(str(i)))
try:
if issleep:
time.sleep(1)
temp=totalrun(num=i,isFirstPage=isFirstPage,sheetname=sheetname,ishotwordrand=ishotwordrand)
ttemp+=temp
#print(temp)
......@@ -309,11 +329,14 @@ if __name__ == '__main__':
# t=hotWord('线上环境搜索热词.xlsx')['search_words'].apply(lambda x:str(x).strip())
# print(t)
#print(totalrun(isFirstPage=1))
isexec=sys.argv[2]
isexec= sys.argv[2] ##判断执行接口还是分析文件
ishotwordrand=0
sheetname=sys.argv[3] ##xlsx工作簿
issleep=sys.argv[4] ##循环执行接口是否需要sleep
runnum=sys.argv[5] ##执行接口总数
print(sys.argv)
if isexec and isexec!='0':
allRun(1000,isFirstPage=1,sheetname=sys.argv[3],ishotwordrand=ishotwordrand)
allRun(runnum,isFirstPage=1,sheetname=sheetname,ishotwordrand=ishotwordrand,issleep=issleep)
else:
print(analysis())
# a=[['236473224274432', '7188063'],['236473224274432', '7188063']]
......
......@@ -9,3 +9,23 @@ cid2_rediskey='{}_cid2_offline'#.format(df['cid2'].to_list()[0])
cid3_rediskey='{}_cid3_offline'#.format(df['cid3'].to_list()[0])
brandname_rediskey='{}_brand_name_offline'
sku_rediskey='{}_sku_offline'
##用户的搜索词结果
user_search_result_hql="""
select response_search_id,request_search_content,count(1)
-- request_user_uuid,request_search_content,response_product_result
from nrt_search.search_record_info
where dt>=date_sub(current_date(),1) and dt<current_date()
and request_search_content is not null
and length(request_search_content)>0
group by response_search_id,request_search_content
"""
##用户搜索的点击操作
user_search_hit_hsql="""
select unique_id,
device_id, uuid, skuid, source_type, click_time, page_type
from data_science.daily_product_click_expose_info
where dt>=date_sub(current_date(),1) and dt<current_date()
and page_type = 100
"""
\ No newline at end of file
from search import *
def getsearchinfo(filename):
filepath=os.path.join(data_file_path,filename)
df=readFile(filepath)
return df
def skusinfo(skus):
skuinfo_change_info=concatSql(skuinfo_sql,**{"sku_no":skus})
df=execmysl(119,skuinfo_change_info)
print(df[['sku_no','sku_name']])
return df[['sku_no','sku_name']]
def dealCustomSearch(word,isrigth=1):
'''
:param word: 自定义搜索词,慎用,这块只适合搜索词结果为空的情况
:param isrigth:
:return:
'''
if isrigth:
sku_change_info=skuinfo_sql+'like'+'%'+word+'%'
else:
sku_change_info=skuinfo_sql+'not like'+'%'+word+'%'
return sku_change_info
def compareresult(skuname,word):
pass
def score(filename):
search_df=getsearchinfo(filename)
for search in search_df.itertuples():
search_result_skus=search.__getattribute__('seaech_result')
search_df['skunames']=skusinfo(search_result_skus)['sku_name'].to_list()
return search_df
if __name__ == '__main__':
filename="searchinfo.xlsx"
t=score(filename)
print(t)
#print(t[['skunames',"search_word"]])
import copy
from databaseConn import *
def listCross(a,b):
'''
......@@ -33,6 +35,24 @@ def mergelist(a):
t=itertools.chain.from_iterable(a)
return list(t)
def listGroup(tt,size=3):
'''
:param tt:
:param size:没用,预留值
:return: list分组,按照递增分组
'''
temp=[]
i=1
while True:
temp.append(tt[:i])
[tt.remove(ii) for ii in tt[:i]]
i+=1
if not tt:
break
return temp
if __name__ == '__main__':
a=[1,2,4,2,1,5,8]
print(removeRepeat(a))
a=[1,2,4,3,5,6,7,8]
print(listGroup(a))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment