您的位置：首页 > 编程语言 > Python开发

用Python实现china-pub登录验证码的识别--代码版

2013-08-12 17:33 761 查看

今天看了下python验证码识别，网上搜索到一个比较简单入门级的。但是代码连缩进也没有，自己动手实践了下。

用Python实现china-pub登录验证码的识别

例子是china-pub的一个验证码，比较简单，很适合初学者拿来练习。

URL：http://www.china-pub.com/edition06/imgchk/validatecode.asp

Python：Python 2.7.3

PIL：Python Imaging Library 1.1.7 for Python 2.7 http://www.pythonware.com/products/pil/

图像处理

验证码的属性，大小为40×10。处理几张图片就得到0～9的全部数字。然后分析数字的特征和它们之间的差别。

import Image

def img2four(image):
width = 10
height = 10
left = 0
upper = 0
right = 10
lower = 10

c = 4
while(c):
box = (left, upper, right, lower)
img = Image.open(image)
region = img.crop(box)
region.convert('L').save(str(4-c)+'.bmp')
left += + 10
right += 10
c -= 1

验证码识别

看某个数字的全部pixel值

def printPixel(image):
img = Image.open(image)

for y in range(0, 10):
for x in range(0, 10):
print img.getpixel((x,y)),
print

用if语句一个一个找，数字被一个一个的识别出来。

def cross(color):
bgcolor = 238
if color != bgcolor:
return True
else:
return False

def recognize(image):
img = Image.open(image)

p = img.getpixel((1,8))
if cross(p):
return 7

p = img.getpixel((0,0))
if cross(p):
return 5

p = img.getpixel((2,1))
if cross(p):
return 1

p = img.getpixel((3,1))
if cross(p):
return 4

p = img.getpixel((1,1))
if cross(p):
# not 1, must be 6
return 6

p = img.getpixel((1,7))
if cross(p):
return 2

p = img.getpixel((2,5))
if cross(p):
return 9

p = img.getpixel((5,4))
if cross(p):
# not 9, must be 0
return 0

p = img.getpixel((1,4))
if cross(p):
return 8
else:
return 3

def getCode(image):
img2four(image)
file_0 = "0.bmp"
file_1 = "1.bmp"
file_2 = "2.bmp"
file_3 = "3.bmp"
n0 = recognize(file_0)
n1 = recognize(file_1)
n2 = recognize(file_2)
n3 = recognize(file_3)

# remove crop files

#import time
#time.sleep(3)
#if you want to see the temp images, sleep 3 secs
if os.path.exists(file_0):
os.remove(file_0)

if os.path.exists(file_1):
os.remove(file_1)

if os.path.exists(file_2):
os.remove(file_2)

if os.path.exists(file_3):
os.remove(file_3)

return str(n0) + str(n1) + str(n2) + str(n3)

compare.py

比较和统计从（0,0）到（9,9）这100个点有哪些数字通过，然后打印出通过的比较少的（1～2个）那些坐标来做判断。

import Image
def printPixel(image):
# get a bmp files all pixel
img = Image.open(image)
l=[]
for y in range(0, 10):
for x in range(0, 10):
l.append(img.getpixel((x,y)))

# print l
return l

def getxy(number):
#convert lists index to the tuple of pixel
x = number % 10
y = number / 10
return (x, y)

def findDiff():
# find different point between 0 and 9
list = []
for i in range(0, 10):
list.append(printPixel('n_'+str(i)+'.bmp'))

# print list

for j in range(0,100):
count = 0
num = []
for k in range(0, 10):
if list[k][j] != 238:
count = count + 1
num.append(k)

if count < 3 and count > 0:
#print 'count = ',count,', pixel is ', getxy(j), ', num is ', num
print "pixel is %s, count = %d, numbers are %s" % (getxy(j), count, num)

if __name__ == "__main__":
findDiff()

get_code.py完整代码

import Image
import urllib
import os

def img2four(image):
width = 10
height = 10
left = 0
upper = 0
right = 10
lower = 10

c = 4
while(c):
box = (left, upper, right, lower)
img = Image.open(image)
region = img.crop(box)
region.convert('L').save(str(4-c)+'.bmp')
left += + 10
right += 10
c -= 1

def printPixel(image):
img = Image.open(image)

for y in range(0, 10):
for x in range(0, 10):
print img.getpixel((x,y)),
print

# img2four("test1.png")
# printPixel("n_0.bmp")

def cross(color):
bgcolor = 238
if color != bgcolor:
return True
else:
return False

def recognize(image):
img = Image.open(image)

p = img.getpixel((1,8))
if cross(p):
return 7

p = img.getpixel((0,0))
if cross(p):
return 5

p = img.getpixel((2,1))
if cross(p):
return 1

p = img.getpixel((3,1))
if cross(p):
return 4

p = img.getpixel((1,1))
if cross(p):
# not 1, must be 6
return 6

p = img.getpixel((1,7))
if cross(p):
return 2

p = img.getpixel((2,5))
if cross(p):
return 9

p = img.getpixel((5,4))
if cross(p):
# not 9, must be 0
return 0

p = img.getpixel((1,4))
if cross(p):
return 8
else:
return 3

def getCode(image):
img2four(image)
file_0 = "0.bmp"
file_1 = "1.bmp"
file_2 = "2.bmp"
file_3 = "3.bmp"
n0 = recognize(file_0)
n1 = recognize(file_1)
n2 = recognize(file_2)
n3 = recognize(file_3)

# remove crop files

#import time
#time.sleep(3)
#if you want to see the temp images, sleep 3 secs
if os.path.exists(file_0):
os.remove(file_0)

if os.path.exists(file_1):
os.remove(file_1)

if os.path.exists(file_2):
os.remove(file_2)

if os.path.exists(file_3):
os.remove(file_3)

return str(n0) + str(n1) + str(n2) + str(n3)

def validateCode():
url = "http://www.china-pub.com/edition06/imgchk/validatecode.asp"
path = "code67.png"
data = urllib.urlopen(url).read()
f = file(path, "wb")
f.write(data)
f.close()
print getCode(path)

if __name__  == "__main__":
validateCode()

参考

http://effbot.org/imagingbook/image.htm

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航