您的位置:首页 > 编程语言 > Python开发

python 360 社区 监控 爬虫 in not in 问题

2017-04-08 14:16 671 查看
发生个特别奇怪的情况,最近老是收到重复邮件,检查爬虫里面有个地方竟然走了两个分支,

如果用in,元素存在的情况下,竟然会走到else里面,用notin,就不会,实在是太奇怪了,写简单的demo的时候不会出现这个情况,不知道是python的问题还是我的问题

#*-coding:utf-8-*-
import urllib2
import re
import smtplib
import time
from email.mime.text import MIMEText

retries1=30
_to = "XXXXXXXXXXXXX@360.cn"

class SendQqMail:
def getqqmail(self, retries):

_user = "XXXXXXXXXXXXXX@qq.com"
_pwd = "XXXXXXXXXXXXXXXXXXX"

msg = MIMEText(listitem1[74:-4])
msg["Subject"] = listitem1[0:42]
msg["From"] = _user
msg["To"] = _to

try:
time.sleep(30)
s = smtplib.SMTP_SSL("smtp.qq.com", 465)
s.login(_user, _pwd)
s.sendmail(_user, _to, msg.as_string())
s.quit()
print "Send QQ Email Success!"
except smtplib.SMTPException, e:
print "retry.QQ mail.........,%s" % e
if retries > 0:
return self.getqqmail(retries - 1)
else:
print "Send QQ Email Falied,%s" % e

class Send163Mail:
def get163mail(self, retries):

_user = "python_smtp_test@163.com"
_pwd = "zk199245qqq"

msg = MIMEText(listitem1[74:-4])
msg["Subject"] = listitem1[0:42]
msg["From"] = _user
msg["To"] = _to

try:
time.sleep(30)
s = smtplib.SMTP_SSL("smtp.163.com", 465)
s.login(_user, _pwd)
s.sendmail(_user, _to, msg.as_string())
s.quit()
print "Send 163 Email Success!"
except smtplib.SMTPException, e:
print "retry.163mail..........,%s" % e
if retries > 0:
return self.get163mail(retries - 1)
else:
print "Send 163 Email Falied,%s" % e

#卫士板块
req1 = urllib2.Request("http://bbs.360.cn/forum-140-1.html")

class openurlrequest:
def tryopenurlrequest(self, req, retries):
try:
time.sleep(30)
response = urllib2.urlopen(req)
bufferread = response.read()
except Exception, what:
#print what, req
if retries > 0:
return self.tryopenurlrequest(req, retries - 1)
else:
print 'open url request Failed', req
return bufferread

buff = openurlrequest()
buffer = buff.tryopenurlrequest(req1, retries1)
getarticlelist = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg = re.findall(getarticlelist,buffer)

print ("卫士板块监控系统已启动,如果监控到新的信息将会自动发送到您的邮箱").decode("utf-8")
print 'ready variables of num: %s' %len(pagemsg)

allurllist = []
for eveurllist in pagemsg:
allurllist.append(eveurllist[0:42])
print len(allurllist)

while True:
time.sleep(30)
buff1 = openurlrequest()
buffer1 = buff.tryopenurlrequest(req1, retries1)
getarticlelist1 = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg1 = re.findall(getarticlelist1,buffer1)
for listitem1 in pagemsg1:
#奇怪的地方就在这里,用in的时候取到的最后一条listitem1[0:42]在allurllist里面,应该不会走到else里面,但是测试的时候,会把两种情况都打印出来,而用not in就不会,这他妈的是因为啥啊
if (listitem1[0:42] not in allurllist):
allurllist.append(listitem1[0:42])
try:
qqsendmailer = SendQqMail()
time.sleep(30)
qqsendmailer.getqqmail(30)
except:
print "QQ mail try five times fail,change 163mail"
neteasysendmailer = Send163Mail()
time.sleep(30)
neteasysendmailer.get163mail(30)
else:
pass
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐