您的位置：首页 > 数据库 > Memcache

python连接mysql，并在中间用memcached保存sql结果

2014-01-05 17:17 441 查看

我在python和mysql中间加了一层memcached中间层，缓存sql查询的结果，以期望获得更好的系统性能。

参考：

/article/4657979.html

http://www.the5fire.com/python-opt-mysql.html

python连接mysql需要先安装一些lib，我是ubuntu，比较easy，直接apt-get

sudo apt-get install libmysqld-dev
sudo apt-get install libmysqlclient-dev
sudo apt-get install python-mysqldb

然后就可以写python去connect mysql啦，当然，一开始，mysql的数据库里面是神马也没有的，要自己先去create一个数据库，然后再create table，insert data。下面是建表和插入数据的python代码

#!/usr/bin/env python

# 20140105,  create_table.py

import MySQLdb

try:
conn=MySQLdb.connect(host='192.168.1.6',user='dba',passwd='111111',port=3306)
cur=conn.cursor()

#cur.execute('create database if not exists testdb')
conn.select_db('testdb')
cur.execute('create table id_info(id int,info varchar(20))')

# value=[1,'hi rollen']
# cur.execute('insert into id_info values(%s,%s)',value)

rg = 100000
values=[]
for i in range(rg):
values.append((i,'aaa'+str(i)))

cur.executemany('insert into id_info values(%s,%s)',values)

values=[]
for i in range(rg):
values.append((i+rg,'bbb'+str(i)))

cur.executemany('insert into id_info values(%s,%s)',values)

values=[]
for i in range(rg):
values.append((i+2*rg,'ccc'+str(i)))

cur.executemany('insert into id_info values(%s,%s)',values)

# cur.execute('update id_info set info="I am rollen" where id=3')

conn.commit()
cur.close()
conn.close()

except MySQLdb.Error,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])

经过以上代码的运行，testdb这个数据库里面就有了一个id_info表，并且这个表里面还有了300000行数据。然后是连接mysql并执行select，我比较喜欢用面向对象的方式来写代码，所以就把连接mysql的程序做了一点封装

#!/usr/bin/env python

# 20140105,  conn_mysql.py

import MySQLdb

class conn_mysql(object):
def __init__(self):
print "init mysql"

def __del__(self):
print "quit mysql"

def connect_db(self):
self.conn=MySQLdb.connect(host='192.168.1.6',user='dba',passwd='111111',db='testdb',port=3306)
self.cur=self.conn.cursor()
self.conn.select_db('testdb')

def test_select(self):
count=self.cur.execute('select * from id_info')
print 'there has %s rows record' % count

result=self.cur.fetchone()
print result
print 'ID: %s info %s' % result

results=self.cur.fetchmany(5)
for r in results:
print r

print '=='*10
self.cur.scroll(0,mode='absolute')

results=self.cur.fetchall()
for r in results:
print r[1]

self.conn.commit()

def test_count(self, str_sql):
count=self.cur.execute(str_sql)
# print 'there has %s rows record' % count

result=self.cur.fetchone()
# print 'id_info has %s rows' % result
str_rows = '%s' % result
return str_rows

def disconnect_db(self):
self.cur.close()
self.conn.close()

用test_select方法，来测试是否连接上，talbe里面数据很多，我是在只有30条数据的时候运行这个测试，之后实验中就一直是用test_count。test_count这个函数的意思是，对输入的sql，其类似格式是“select count(*) from ......”这样的时候，就把结果以字符串形式返回。下面是测试程序

#!/usr/bin/env python

# 20140105,  conn_mysql_raw.py

import conn_mysql

str_sql = 'select count(*) from id_info'

db_connect = conn_mysql.conn_mysql()
db_connect.connect_db()

for i in range(10000):
str_rows = db_connect.test_count('select count(*) from id_info where info like \'bbb%\'')

print(str_rows + ' rows selected.')

db_connect.disconnect_db()

针对之前的数据，运行10000次sql，select count(*) from id_info where info like 'bbb%'，当然，每次的返回结果都是100000，主要是测试这个程序的运行时间。在我的机器上，时间是1.9s。

然后是在mysql前面加入一层memcached，需要先下载python-memcached-latest.tar.gz，这个自行google吧，apt-get源里面似乎是没有。我拿到的版本是python-memcached-1.53。安装python-memcached之前要先安装python-setuptools，不然会报错“ImportError: No module named 'setuptools'”

tar zxvf python-memcached-latest.tar.gz
cd python-memcached-1.53/
sudo apt-get install python-setuptools
sudo python setup.py install

然后，把memcached启动，run一段python代码测试一下

#!/usr/bin/env python
# 20140105, test_memcached.py

import memcache

mc = memcache.Client(['localhost:11211'],debug=0)
mc.set("foo","bar")
value = mc.get("foo")
print value

看到输出是“bar”就说明已经连上memcached了。下面就要用memcached做mysql的缓存，看性能能提升到什么程度。先改写mysql连接的封装类

#!/usr/bin/env python

# 20140105,  conn_mysql.py

import MySQLdb
import memcache
import hashlib

class conn_mysql(object):
def __init__(self):
print "init mysql"

def __del__(self):
print "quit mysql"

def connect_db(self):
self.conn=MySQLdb.connect(host='192.168.1.6',user='dba',passwd='111111',db='testdb',port=3306)
self.cur=self.conn.cursor()
self.conn.select_db('testdb')

def test_select(self):
count=self.cur.execute('select * from id_info')
print 'there has %s rows record' % count

result=self.cur.fetchone()
print result
print 'ID: %s info %s' % result

results=self.cur.fetchmany(5)
for r in results:
print r

print '=='*10
self.cur.scroll(0,mode='absolute')

results=self.cur.fetchall()
for r in results:
print r[1]

self.conn.commit()

def test_count(self, str_sql):
count=self.cur.execute(str_sql)
# print 'there has %s rows record' % count

result=self.cur.fetchone()
# print 'id_info has %s rows' % result
str_rows = '%s' % result
return str_rows

def connect_cache(self):
self.mc = memcache.Client(['localhost:11211'],debug=0)

def test_count_cached(self, str_sql):
str_hash = hashlib.md5(str_sql).hexdigest()
#str_hash = myhash(str_sql)

result = self.mc.get(str_hash)
if result != None:
# str_org_sql = self.mc.get('SQL'+str_hash)
# if str_org_sql == str_sql:
str_rows = '%s' % result
return str_rows

count = self.cur.execute(str_sql)
# print 'there has %s rows record' % count

result = self.cur.fetchone()
self.mc.set(str_hash, result)
self.mc.set('SQL'+str_hash, str_sql)
# print(str_hash)
# print 'id_info has %s rows' % result
str_rows = '%s' % result
return str_rows

def disconnect_db(self):
self.cur.close()
self.conn.close()

增加memcached相关的配置信息，增加测试函数test_count_cached，先对输入的sql做字符串hash（我用的md5），以这个hash值为key去memcached中查找有没有结果，如果有就直接返回；否则再去mysql中查询，并把查询的结果做value，sql的hash值做key，存在memcached中。run这个测试函数

#!/usr/bin/env python

# 20140105,  conn_mysql_memcached.py

import conn_mysql

str_sql = 'select count(*) from id_info where info like \'bbb%\''

db_connect = conn_mysql.conn_mysql()
db_connect.connect_db()
db_connect.connect_cache()

for i in range(10000):
str_rows = db_connect.test_count_cached(str_sql)

print(str_rows + ' rows selected.')

db_connect.disconnect_db()

经过改进之后的test_count_cached的运行时间是1.0s，改进并不如我期望的大。可能的原因，我的mysql是装在本机上的，没有网络通讯的开销，一般情况下，mysql是在单独的数据库服务器上，而memcached是在业务服务器上，做一次sql查询是有网络开销的，所以在这种场景下，效果应该会更明显。

实验中还有一个小问题，我在对比sql文本的时候只对比了hash值，并没有对比sql文本本身，如果进行这样的对比，势必会造成性能下降。事实也是如此，我加入这段对比之后，test_count_cached的运行时间变为1.8s，不考虑误差的话，运行时间基本上是刚才的case的2倍。这也很显然，因为主要的开销都是在获取memcached中的结果，加入sql文本对比的同时也多了1次获取memcached结果的消耗。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航