您的位置:首页 > 编程语言 > Python开发

python 微信机器人自动聊天+回怼表情包+Adidas公众号自动抽签 四、获取表情包中的文字并进行同类型回怼

2019-07-02 12:04 260 查看

前言

在前文自动回怼表情包中, 只是做到爬取表情包存放在本地 ,然后当微信好友发送表情包给你的时候,随机抓取本地一个表情包进行回复。太不灵活

识别表情包中的文字

from __future__ import unicode_literals
from wxpy import *
import requests
import json
import re
import urllib.request  #urllib2
import threading
import glob
import random
import urllib
import base64
import os
from apscheduler.schedulers.blocking import BlockingScheduler
bot = Bot(cache_path=True)

@bot.register(except_self=False)
def print_others(msg):
print(msg)
message = msg.text
type = msg.type
reply = u''
if type == 'Text':
# 机器人自动陪聊
if get_response(message) != '亲爱的,当天请求次数已用完。':
reply = get_response(message)
else:
reply = ''
return reply
elif type == 'Picture':
# 识别图中文字
# 获取对方发送的表情包
path = os.path.join('./getImages/' + msg.file_name)
msg.get_file(path)
mes=getMessageByImage(msg.file_name)
return searchImg(mes)

# 识别图片文字 百度云技术接口https://cloud.baidu.com/doc/OCR/s/zjwvxzrw8/
def getMessageByImage(imageName):
takonUrl = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=**************&client_secret=************' # 根据 http://ai.baidu.com/docs#/Auth/top 百度账号的id和secret获取的takon 然后放入url中
res = requests.get(takonUrl)
# 找到token
takon = res.json()['access_token']
url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=' + takon
with open(current_path+'/getImages/'+imageName, 'rb') as f:
data = base64.b64encode(f.read())
imageEncode = str(data, 'utf-8')
params = {"image": imageEncode}
headers = {
'Content-Type': 'application/x-www-form-urlencoded'
}
postdata = urllib.parse.urlencode(params).encode('utf-8')
request = urllib.request.Request(url=url, data=postdata, headers=headers)
res = urllib.request.urlopen(request)
page_source = res.read().decode('utf-8')
info = json.loads(page_source)
s = ''
for i in range(0, int(info['words_result_num'])):
s = s + info['words_result'][i]['words']
# 获取到图片中的文字
return get_response(msg)

def get_response(msg):
apiUrl = 'http://www.tuling123.com/openapi/api'   #图灵机器人的api
payload = {
'key': 'ce697b3fc8b54d5f88c2fa59772cb2cf',  # api Key
'info': msg,  # 这是我们收到的消息
'userid': 'wechat-robot',  # 这里可随意修改
}
# 通过如下命令发送一个post请求
r = requests.post(apiUrl, data=json.dumps(payload))
mes = json.loads(r.text)['text']
return mes

# 获取表情包
def Downloader(step):
# 定义目标网站url
baseurl = 'http://www.doutula.com/photo/list/?page='
# #编写模拟浏览器获取
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'gzip',
'Connection': 'close',
'Referer': None  # 注意如果依然不能抓取的话,这里可以设置抓取网站的host
}
# 遍历获得目标网站的每一页

a = step * 50
b = step * 50 + 50
for i in range(a, b):
urls = baseurl + str(i)
print(urls)

# 用Request的get请求获取网页代码
r = requests.get(urls, headers=headers)
html = r.text
# #用正则匹配获取图片链接
zz = re.compile(r'data-original="(.*?)".*?alt="(.*?)"', re.S)
img = re.findall(zz, html)
# 遍历得到图片名字和url
for a in img:
# 设置保存路径
imgname = a[1]
imgname = re.sub('\/|\\\\|《|》|。|?|!|\.|\?|!|\*|&|#|(|)|(|)|(|)', '', imgname)
imgtype = a[0].split('.')[-1]
path = ('battleImages/%s.%s' % (imgname, imgtype))
print(path, a[0])
# 用urllib库来进行保存
dir = os.path.join('./', path)
urllib.request.urlretrieve(a[0], dir)

t_obj = []
# 多线程爬取表情包
for i in range(10):
t = threading.Thread(target=Downloader, args=(i,))
# t.start()
# t_obj.append(t)

for t in t_obj:
t.join()

current_path = os.getcwd()
imgs=[]

# 寻找图
def searchImg(keywords):
print('keywords: %s' % keywords)
for name in glob.glob(current_path+'/battleImages/*'+keywords+'.*'):
imgs.append(name)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: