您的位置:首页 > 编程语言 > Python开发

用Python实现china-pub登录验证码的识别--代码版

2013-08-12 17:33 761 查看
今天看了下python验证码识别,网上搜索到一个比较简单入门级的。但是代码连缩进也没有,自己动手实践了下。

用Python实现china-pub登录验证码的识别

例子是china-pub的一个验证码,比较简单,很适合初学者拿来练习。

URL:http://www.china-pub.com/edition06/imgchk/validatecode.asp

Python:Python 2.7.3

PIL:Python Imaging Library 1.1.7 for Python 2.7 http://www.pythonware.com/products/pil/

图像处理

验证码的属性,大小为40×10。处理几张图片就得到0~9的全部数字。然后分析数字的特征和它们之间的差别。
import Image

def img2four(image):
width = 10
height = 10
left = 0
upper = 0
right = 10
lower = 10

c = 4
while(c):
box = (left, upper, right, lower)
img = Image.open(image)
region = img.crop(box)
region.convert('L').save(str(4-c)+'.bmp')
left += + 10
right += 10
c -= 1

验证码识别

看某个数字的全部pixel值
def printPixel(image):
img = Image.open(image)

for y in range(0, 10):
for x in range(0, 10):
print img.getpixel((x,y)),
print
用if语句一个一个找,数字被一个一个的识别出来。
def cross(color):
bgcolor = 238
if color != bgcolor:
return True
else:
return False

def recognize(image):
img = Image.open(image)

p = img.getpixel((1,8))
if cross(p):
return 7

p = img.getpixel((0,0))
if cross(p):
return 5

p = img.getpixel((2,1))
if cross(p):
return 1

p = img.getpixel((3,1))
if cross(p):
return 4

p = img.getpixel((1,1))
if cross(p):
# not 1, must be 6
return 6

p = img.getpixel((1,7))
if cross(p):
return 2

p = img.getpixel((2,5))
if cross(p):
return 9

p = img.getpixel((5,4))
if cross(p):
# not 9, must be 0
return 0

p = img.getpixel((1,4))
if cross(p):
return 8
else:
return 3

def getCode(image):
img2four(image)
file_0 = "0.bmp"
file_1 = "1.bmp"
file_2 = "2.bmp"
file_3 = "3.bmp"
n0 = recognize(file_0)
n1 = recognize(file_1)
n2 = recognize(file_2)
n3 = recognize(file_3)

# remove crop files

#import time
#time.sleep(3)
#if you want to see the temp images, sleep 3 secs
if os.path.exists(file_0):
os.remove(file_0)

if os.path.exists(file_1):
os.remove(file_1)

if os.path.exists(file_2):
os.remove(file_2)

if os.path.exists(file_3):
os.remove(file_3)

return str(n0) + str(n1) + str(n2) + str(n3)


compare.py

比较和统计从(0,0)到(9,9)这100个点有哪些数字通过,然后打印出通过的比较少的(1~2个)那些坐标来做判断。

import Image
def printPixel(image):
# get a bmp files all pixel
img = Image.open(image)
l=[]
for y in range(0, 10):
for x in range(0, 10):
l.append(img.getpixel((x,y)))

# print l
return l

def getxy(number):
#convert lists index to the tuple of pixel
x = number % 10
y = number / 10
return (x, y)

def findDiff():
# find different point between 0 and 9
list = []
for i in range(0, 10):
list.append(printPixel('n_'+str(i)+'.bmp'))

# print list

for j in range(0,100):
count = 0
num = []
for k in range(0, 10):
if list[k][j] != 238:
count = count + 1
num.append(k)

if count < 3 and count > 0:
#print 'count = ',count,', pixel is ', getxy(j), ', num is ', num
print "pixel is %s, count = %d, numbers are %s" % (getxy(j), count, num)

if __name__ == "__main__":
findDiff()

get_code.py完整代码

import Image
import urllib
import os

def img2four(image):
width = 10
height = 10
left = 0
upper = 0
right = 10
lower = 10

c = 4
while(c):
box = (left, upper, right, lower)
img = Image.open(image)
region = img.crop(box)
region.convert('L').save(str(4-c)+'.bmp')
left += + 10
right += 10
c -= 1

def printPixel(image): img = Image.open(image) for y in range(0, 10): for x in range(0, 10): print img.getpixel((x,y)), print

# img2four("test1.png")
# printPixel("n_0.bmp")

def cross(color): bgcolor = 238 if color != bgcolor: return True else: return False def recognize(image): img = Image.open(image) p = img.getpixel((1,8)) if cross(p): return 7 p = img.getpixel((0,0)) if cross(p): return 5 p = img.getpixel((2,1)) if cross(p): return 1 p = img.getpixel((3,1)) if cross(p): return 4 p = img.getpixel((1,1)) if cross(p): # not 1, must be 6 return 6 p = img.getpixel((1,7)) if cross(p): return 2 p = img.getpixel((2,5)) if cross(p): return 9 p = img.getpixel((5,4)) if cross(p): # not 9, must be 0 return 0 p = img.getpixel((1,4)) if cross(p): return 8 else: return 3 def getCode(image): img2four(image) file_0 = "0.bmp" file_1 = "1.bmp" file_2 = "2.bmp" file_3 = "3.bmp" n0 = recognize(file_0) n1 = recognize(file_1) n2 = recognize(file_2) n3 = recognize(file_3) # remove crop files #import time #time.sleep(3) #if you want to see the temp images, sleep 3 secs if os.path.exists(file_0): os.remove(file_0) if os.path.exists(file_1): os.remove(file_1) if os.path.exists(file_2): os.remove(file_2) if os.path.exists(file_3): os.remove(file_3) return str(n0) + str(n1) + str(n2) + str(n3)

def validateCode():
url = "http://www.china-pub.com/edition06/imgchk/validatecode.asp"
path = "code67.png"
data = urllib.urlopen(url).read()
f = file(path, "wb")
f.write(data)
f.close()
print getCode(path)

if __name__ == "__main__":
validateCode()


参考

http://effbot.org/imagingbook/image.htm
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: