修改search脚本

336b7cd5 · 桂秋月 · e36447d4 · 336b7cd5 · 336b7cd5 · 336b7cd5
Commit 336b7cd5 authored Apr 27, 2022 by 桂秋月
34 changed files
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/bigData.iml
+++ b/bigData.iml
@@ -2,8 +2,10 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
\ No newline at end of file
--- a/dataFile/.~工作簿1.xlsx
+++ b/dataFile/.~工作簿1.xlsx
--- a/dataFile/searchinfo.xlsx
+++ b/dataFile/searchinfo.xlsx
--- a/dataFile/线上环境搜索热词.xlsx
+++ b/dataFile/线上环境搜索热词.xlsx
--- a/databaseConn/__init__.py
+++ b/databaseConn/__init__.py
 import jsonpath,requests,os,re,json,sys,traceback,datetime,random,time,itertools
+from pymongo import MongoClient
 from collections import defaultdict
 import uuid as codeuuid
 cur_dir=os.path.dirname(os.path.abspath(__file__))
@@ -44,5 +45,15 @@ mysql_info={
        "pwd":"Qa6pxybKE8KgsUGV",
        "host":"172.30.5.27",
        "port":"7438"
+    },
+    "nearly":{
+        "name":"nearlinerds",
+        "pwd":"3^4f=GZeA$",
+        "host":"recommend-nearline-prod-5-7.cr22rydq7l0o.rds.cn-north-1.amazonaws.com.cn",
+        "port":"3306"
    }
 }
+
+mongo_info={
+
+}
\ No newline at end of file
--- a/databaseConn/__pycache__/__init__.cpython-37.pyc
+++ b/databaseConn/__pycache__/__init__.cpython-37.pyc
--- a/databaseConn/__pycache__/mysqlOperation.cpython-37.pyc
+++ b/databaseConn/__pycache__/mysqlOperation.cpython-37.pyc
--- a/databaseConn/mongoOperation.py
+++ b/databaseConn/mongoOperation.py
 from databaseConn import *
+from databaseConn.mysqlOperation import mysqlInfo

+def mongo_universal(namespace):
+    mysql_universal="mongodb://{name}:{pwd}@{host}:{port}"
+    mysql_info=mysqlInfo(namespace,dbtype='mongodb')
+    #print(mysql_info)
+    temp=mysql_universal.format(**mysql_info)
+    print("mongodb连接信息:",temp)
+    return temp
+
+def mongoClient(namespace,db="app_vcc2"):
+    '''
+    :param namespace:
+    :param db:
+    :return: 自动连接该db
+    '''
+    temp=mongo_universal(namespace)
+    return eval("MongoClient(temp).{db}".format(db=db))
+
+def tableSearch(namespace,tablename,**kwargs):
+    '''
+    :param namespace: 测试环境的namesapce
+    :param tablename: 表名
+    :param kwargs: 需要包括库名和查询规则，默认是list处理。
+    eg：{"db":"app_vcc2","query":[{"event":"loginSuccess"#,"userId":"70549608"},{"_id":1}]}
+    :return: 返回find结果
+    '''
+    temp=''
+    db=mongoClient(namespace,kwargs.get('db'))
+    tables=[i['name'] for i in db.list_collections()]
+    if tablename not in tables:
+        raise Exception('table[{}]不在databse[{}]中,所有的table如下:'.format(tablename,kwargs.get('db')),tables)
+    #print("db结果",db,kwargs.get('query'))
+    change_result="db.{tablename}.find(".format(tablename=tablename)
+    for i in kwargs.get('query'):
+        temp+=str(i)+','
+    change_result+=temp[:-1]+")"
+    print("mongo拼接结果：",change_result)
+    return eval(change_result)
+
+
+
+
+if __name__ == '__main__':
+    key={"db":"ocrDb","query":[{
+       #,"userId":"70549608"
+    },{"_id":1}]}
+    temp=tableSearch('qa','system.version',**key)
+    result=[i for i in temp]
+    print(len(result))
+    for i in temp:
+        print('===',i)
+    print(temp)
\ No newline at end of file
--- a/databaseConn/mysqlOperation.py
+++ b/databaseConn/mysqlOperation.py
@@ -9,12 +9,14 @@ from databaseConn import *
 #conn_db_2_5 = create_engine('mysql://root:123456@172.29.2.5:3306')
 # conn_db_5_16 = create_engine('mysql://root:root@172.29.5.16:3306')

-def mysqlInfo(namespace):
+def mysqlInfo(namespace,dbtype='mysql'):
    """
    :param namespace:
+    :param  dbtype:查询的类型，目前支持mysql和mongo
    :return: 获取namespace对应的mysql信息，对大数据的mysql不适用，此时需要自定义mysql_info
    """
-    url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}&serviceType=base&serviceName=mysql".format(namespace)
+    url="https://holmes.liangkebang.com/k8s/service/detail?namespace={}" \
+        "&serviceType=base&serviceName={dbtype}".format(namespace,dbtype=dbtype)
    result=mysql_info.get(str(namespace)) or {}
    if result:
        return result

--- a/recommend/__pycache__/__init__.cpython-37.pyc
+++ b/recommend/__pycache__/__init__.cpython-37.pyc
--- a/recommend/__pycache__/publicFunc.cpython-37.pyc
+++ b/recommend/__pycache__/publicFunc.cpython-37.pyc
--- a/recommend/__pycache__/publicSql.cpython-37.pyc
+++ b/recommend/__pycache__/publicSql.cpython-37.pyc
--- a/recommend/orderTopic.py
+++ b/recommend/orderTopic.py
 from recommend import *
 from recommend.publicFunc import skuinfo,dapanBu

-def preconditions(uuid):
+def preconditions(uuid,sku_no):
    '''
    :param uuid:
    :return: 判断上批次是否为加购topic,是则不触发，不是才触发
    '''
-    back_batch_redis=""
-    return True
+    try:
+        eventtype= 'orderTopic'
+        back_batch_change_redis=back_batch_redis.format(uuid=uuid,sku_no=sku_no)
+        ttt=getRedisValue(back_batch_change_redis).get(back_batch_change_redis)
+        cur_btach_change_sql=concatSql(cur_batch_sql,**{"recall_batch_uuid":ttt})
+        cur_batch_df=execmysl(119,cur_batch_sql.format(uuid))
+        is_order_topic= 0 if cur_batch_df.empty or not  cur_batch_df['event_type'].to_list()[0]==eventtype else 1
+        return is_order_topic
+    except:
+        return 0

 def recall_J1(skuno):
    '''
@@ -37,7 +45,6 @@ def recall_J1(skuno):
    return temp


-
 def supply_J1(skuno):
    sku_info=skuinfo(skuno)
    dapan_change_sql=concatSql(dapan_sql,**{"cid3":sku_info.get('cid3')})
@@ -57,4 +64,6 @@ def recall_J3():


 if __name__ == '__main__':
-    print(recall_J1('12312'))
+    #print(recall_J1('12312'))
+    t=preconditions(1,2)
+    print(t)
--- a/recommend/publicSql.py
+++ b/recommend/publicSql.py
@@ -5,7 +5,7 @@ select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id from
 '''
 ##商品表
 skuinfo_sql='''
-select id,sku_no,price,cid1,cid2,cid3,brand_name,brand_id,source_type from
+select id,sku_no,sku_name,price,cid1,cid2,cid3,brand_name,brand_id,source_type from
    offline_recommend.recommend_product_info
 '''
 ##相似表
@@ -15,12 +15,18 @@ similar_redis='product_similarity:{skuno}'
 correlation_redis='product_correlation:${skuNo}'

 ##本批次+上一批次
-cur_batch_sql="""
+nearly_batch_sql="""
 select a.*,b.sku_no,b.recall_reason from 
-(select id,recall_batch_uuid from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a
+(select id,recall_batch_uuid,event_type from nearline_recommend.recommend_product_batch where user_uuid='{}' order by id desc limit 2)a
 join nearline_recommend.recommend_product_record b on a.recall_batch_uuid=b.recall_batch_uuid
 """

+cur_batch_sql="""
+select id,recall_batch_uuid,event_type from nearline_recommend.recommend_product_batch 
+"""
+
+
+
 #b.jg_id,b.category_id,b.category_level,b.final_score,a.rank,a.click_num
 hot_jd150_sql="""
 select distinct {}  from offline_recommend.jg_heat_rank 

--- a/search/.DS_Store
+++ b/search/.DS_Store
--- a/search/__init__.py
+++ b/search/__init__.py
--- a/search/__pycache__/__init__.cpython-37.pyc
+++ b/search/__pycache__/__init__.cpython-37.pyc
--- a/search/__pycache__/abSearch_script.cpython-37.pyc
+++ b/search/__pycache__/abSearch_script.cpython-37.pyc
--- a/search/abSearch_script.py
+++ b/search/abSearch_script.py
@@ -101,18 +101,33 @@ def requestsend(uuid,deviceid,searchContent,selectedActivitie,page=1,searchtype=
    t = requests.post(url, json=body, headers=header)
    print('搜索词:',searchContent)
    try:
+        sku_nos=[]
+        sku_templat={"sku_no":"","cid3":"","cid2":"","cid1":"","brandName":""}
        spu_nos = jsonpath.jsonpath(t.json(), '$..skuNo') or ['null']
+        cid3  = jsonpath.jsonpath(t.json(), '$..cid3') or ['null']
+        cid2  = jsonpath.jsonpath(t.json(), '$..cid1') or ['null']
+        cid1  = jsonpath.jsonpath(t.json(), '$..cid2') or ['null']
+        brandName  = jsonpath.jsonpath(t.json(), '$..brandName') or ['null']
+        for index,i in enumerate(spu_nos):
+            sku_templat["sku_no"]=i
+            sku_templat["cid3"]=cid3[index]
+            sku_templat["cid2"]=cid2[index]
+            sku_templat["cid1"]=cid1[index]
+            sku_templat["brandName"]=brandName[index]
+            sku_nos.append(sku_templat)
        sku_names = jsonpath.jsonpath(t.json(), '$..skuName') or ['null']
        page_size = jsonpath.jsonpath(t.json(), '$..totalPage')[0]
        search_id = jsonpath.jsonpath(t.json(), '$..searchId')[0]
        total_skus=jsonpath.jsonpath(t.json(), '$..totalCount')[0]
        request_time = t.elapsed.total_seconds()
-        tttt=','.join(spu_nos)
-        #print(tttt,type(tttt))
+        #tttt=','.join(spu_nos)
+        print(sku_nos)
        # with open('sku_result_3.txt', 'a+') as file:
        #     file.write('搜索词:' + searchContent + ";result:" + ','.join(sku_names) + "\n")
-        return page_size, ','.join(spu_nos), str(request_time), search_id,','.join(sku_names),searchContent,total_skus
+        return page_size,sku_nos , str(request_time), search_id,','.join(sku_names),searchContent,total_skus
+        #return page_size, ','.join(spu_nos), str(request_time), search_id,','.join(sku_names),searchContent,total_skus
    except:
+        traceback.print_exc(limit=2)
        print('搜索接口报错:',searchContent,t.text)


@@ -179,7 +194,9 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname=
        result_temp["searchcontent"]=searchContent
        result_temp["pageno"]=flag
        result_temp["costtime"]=request_time
-        result_temp["skunos"]=spu_nos
+        #result_temp["skunos"]=spu_nos
+        for index,i in enumerate(spu_nos):
+            result_temp["skunos"+str(index)]=i
        result_temp['totalSkus']=total_skus
        temp.append(result_temp)
        #file_template=["nowtime","searchcontent","pageno","costtime","deviceid","uuid","skunos"]
@@ -206,11 +223,14 @@ def totalrun(selectedActivitie=1, type='txt', num=0,isFirstPage=False,sheetname=
        traceback.print_exc(limit=2)


-def allRun(num,isFirstPage=0,sheetname=0,ishotwordrand=1):
+def allRun(num,isFirstPage=0,sheetname=0,ishotwordrand=1,issleep=1):
    ttemp=[]
+    num=int(num)
    for i in range(num):
        print('第[{}]次搜索'.format(str(i)))
        try:
+            if issleep:
+                time.sleep(1)
            temp=totalrun(num=i,isFirstPage=isFirstPage,sheetname=sheetname,ishotwordrand=ishotwordrand)
            ttemp+=temp
            #print(temp)
@@ -309,11 +329,14 @@ if __name__ == '__main__':
    # t=hotWord('线上环境搜索热词.xlsx')['search_words'].apply(lambda x:str(x).strip())
    # print(t)
    #print(totalrun(isFirstPage=1))
-    isexec=sys.argv[2]
+    isexec= sys.argv[2] ##判断执行接口还是分析文件
    ishotwordrand=0
+    sheetname=sys.argv[3] ##xlsx工作簿
+    issleep=sys.argv[4]  ##循环执行接口是否需要sleep
+    runnum=sys.argv[5]  ##执行接口总数
    print(sys.argv)
    if isexec and isexec!='0':
-        allRun(1000,isFirstPage=1,sheetname=sys.argv[3],ishotwordrand=ishotwordrand)
+        allRun(runnum,isFirstPage=1,sheetname=sheetname,ishotwordrand=ishotwordrand,issleep=issleep)
    else:
        print(analysis())
    # a=[['236473224274432', '7188063'],['236473224274432', '7188063']]

--- a/search/publicSql.py
+++ b/search/publicSql.py
@@ -9,3 +9,23 @@ cid2_rediskey='{}_cid2_offline'#.format(df['cid2'].to_list()[0])
 cid3_rediskey='{}_cid3_offline'#.format(df['cid3'].to_list()[0])
 brandname_rediskey='{}_brand_name_offline'
 sku_rediskey='{}_sku_offline'
+
+
+##用户的搜索词结果
+user_search_result_hql="""
+select response_search_id,request_search_content,count(1)
+-- request_user_uuid,request_search_content,response_product_result
+from nrt_search.search_record_info
+where  dt>=date_sub(current_date(),1) and dt<current_date()
+and request_search_content is not null 
+and length(request_search_content)>0
+group by response_search_id,request_search_content
+"""
+##用户搜索的点击操作
+user_search_hit_hsql="""
+select  unique_id, 
+            device_id, uuid, skuid, source_type, click_time, page_type
+    from data_science.daily_product_click_expose_info
+    where  dt>=date_sub(current_date(),1) and dt<current_date()
+    and page_type = 100
+"""
\ No newline at end of file
--- a/search/searchScore.py
+++ b/search/searchScore.py
+from search import *
+
+def getsearchinfo(filename):
+    filepath=os.path.join(data_file_path,filename)
+    df=readFile(filepath)
+    return df
+
+def skusinfo(skus):
+    skuinfo_change_info=concatSql(skuinfo_sql,**{"sku_no":skus})
+    df=execmysl(119,skuinfo_change_info)
+    print(df[['sku_no','sku_name']])
+    return df[['sku_no','sku_name']]
+
+def dealCustomSearch(word,isrigth=1):
+    '''
+    :param word: 自定义搜索词，慎用，这块只适合搜索词结果为空的情况
+    :param isrigth:
+    :return:
+    '''
+    if isrigth:
+        sku_change_info=skuinfo_sql+'like'+'%'+word+'%'
+    else:
+        sku_change_info=skuinfo_sql+'not like'+'%'+word+'%'
+    return sku_change_info
+
+def compareresult(skuname,word):
+    pass
+
+
+
+def score(filename):
+    search_df=getsearchinfo(filename)
+    for search in search_df.itertuples():
+        search_result_skus=search.__getattribute__('seaech_result')
+        search_df['skunames']=skusinfo(search_result_skus)['sku_name'].to_list()
+    return search_df
+
+if __name__ == '__main__':
+    filename="searchinfo.xlsx"
+    t=score(filename)
+    print(t)
+    #print(t[['skunames',"search_word"]])
+
--- a/search/tempFile/.~searchResult_1650253997.xlsx
+++ b/search/tempFile/.~searchResult_1650253997.xlsx
--- a/search/tempFile/.~searchResult_1651029406.xlsx
+++ b/search/tempFile/.~searchResult_1651029406.xlsx
--- a/search/tempFile/.~searchResult_1651029474.xlsx
+++ b/search/tempFile/.~searchResult_1651029474.xlsx
--- a/search/tempFile/searchResult_1651028796.xlsx
+++ b/search/tempFile/searchResult_1651028796.xlsx
--- a/search/tempFile/searchResult_1651029085.xlsx
+++ b/search/tempFile/searchResult_1651029085.xlsx
--- a/search/tempFile/searchResult_1651029111.xlsx
+++ b/search/tempFile/searchResult_1651029111.xlsx
--- a/search/tempFile/searchResult_1651029279.xlsx
+++ b/search/tempFile/searchResult_1651029279.xlsx
--- a/search/tempFile/searchResult_1651029406.xlsx
+++ b/search/tempFile/searchResult_1651029406.xlsx
--- a/search/tempFile/searchResult_1651029474.xlsx
+++ b/search/tempFile/searchResult_1651029474.xlsx
--- a/tools/__pycache__/__init__.cpython-37.pyc
+++ b/tools/__pycache__/__init__.cpython-37.pyc
--- a/tools/__pycache__/httprequest.cpython-37.pyc
+++ b/tools/__pycache__/httprequest.cpython-37.pyc
--- a/tools/listOperation.py
+++ b/tools/listOperation.py
+import copy
+
 from databaseConn import *
 def listCross(a,b):
    '''
@@ -33,6 +35,24 @@ def mergelist(a):
    t=itertools.chain.from_iterable(a)
    return list(t)

+
+def listGroup(tt,size=3):
+    '''
+    :param tt:
+    :param size:没用，预留值
+    :return: list分组，按照递增分组
+    '''
+    temp=[]
+    i=1
+    while True:
+        temp.append(tt[:i])
+        [tt.remove(ii) for ii in tt[:i]]
+        i+=1
+        if not tt:
+            break
+    return temp
+
+
 if __name__ == '__main__':
-    a=[1,2,4,2,1,5,8]
-    print(removeRepeat(a))
+    a=[1,2,4,3,5,6,7,8]
+    print(listGroup(a))