Python中的高级数据结构
2017-05-10 11:11
267 查看
数据结构
数据结构的概念很好理解,就是用来将数据组织在一起的结构。换句话说,数据结构是用来存储一系列关联数据的东西。在Python中有四种内建的数据结构,分别是List、Tuple、Dictionary以及Set。大部分的应用程序不需要其他类型的数据结构,但若是真需要也有很多高级数据结构可供选择,例如Collection、Array、Heapq、Bisect、Weakref、Copy以及Pprint。本文将介绍这些数据结构的用法,看看它们是如何帮助我们的应用程序的。关于四种内建数据结构的使用方法很简单,并且网上有很多参考资料,因此本文将不会讨论它们。
1. Collections
collections模块包含了内建类型之外的一些有用的工具,例如Counter、defaultdict、OrderedDict、deque以及nametuple。其中Counter、deque以及defaultdict是最常用的类。1.1 Counter()
如果你想统计一个单词在给定的序列中一共出现了多少次,诸如此类的操作就可以用到Counter。来看看如何统计一个list中出现的item次数:Python
from collections import Counter
li = ["Dog", "Cat", "Mouse", 42, "Dog", 42, "Cat", "Dog"]
a = Counter(li)
print a # Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1})
1 2 3 4 5 | from collections import Counter li = ["Dog", "Cat", "Mouse", 42, "Dog", 42, "Cat", "Dog"] a = Counter(li) a # Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1}) |
Python
from collections import Counter
li = ["Dog", "Cat", "Mouse", 42, "Dog", 42, "Cat", "Dog"]
a = Counter(li)
print a # Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1})
print len(set(li)) # 4
1 2 3 4 5 6 7 | from collections import Counter li = ["Dog", "Cat", "Mouse", 42, "Dog", 42, "Cat", "Dog"] a = Counter(li) a # Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1}) len(set(li)) # 4 |
Python
from collections import Counter
li = ["Dog", "Cat", "Mouse","Dog","Cat", "Dog"]
a = Counter(li)
print a # Counter({'Dog': 3, 'Cat': 2, 'Mouse': 1})
print "{0} : {1}".format(a.values(),a.keys()) # [1, 3, 2] : ['Mouse', 'Dog', 'Cat']
print(a.most_common(3)) # [('Dog', 3), ('Cat', 2), ('Mouse', 1)]
1 2 3 4 5 6 7 8 9 10 | from collections import Counter li = ["Dog", "Cat", "Mouse","Dog","Cat", "Dog"] a = Counter(li) print a # Counter({'Dog': 3, 'Cat': 2, 'Mouse': 1}) print "{0} : {1}".format(a.values(),a.keys()) # [1, 3, 2] : ['Mouse', 'Dog', 'Cat'] print(a.most_common(3)) # [('Dog', 3), ('Cat', 2), ('Mouse', 1)] |
Python
import re
from collections import Counter
string = """ Lorem ipsum dolor sit amet, consectetur
adipiscing elit. Nunc ut elit id mi ultricies
adipiscing. Nulla facilisi. Praesent pulvinar,
sapien vel feugiat vestibulum, nulla dui pretium orci,
non ultricies elit lacus quis ante. Lorem ipsum dolor
sit amet, consectetur adipiscing elit. Aliquam
pretium ullamcorper urna quis iaculis. Etiam ac massa
sed turpis tempor luctus. Curabitur sed nibh eu elit
mollis congue. Praesent ipsum diam, consectetur vitae
ornare a, aliquam a nunc. In id magna pellentesque
tellus posuere adipiscing. Sed non mi metus, at lacinia
augue. Sed magna nisi, ornare in mollis in, mollis
sed nunc. Etiam at justo in leo congue mollis.
Nullam in neque eget metus hendrerit scelerisque
eu non enim. Ut malesuada lacus eu nulla bibendum
id euismod urna sodales. """
words = re.findall(r'\w+', string) #This finds words in the document
lower_words = [word.lower() for word in words] #lower all the words
word_counts = Counter(lower_words) #counts the number each time a word appears
print word_counts
# Counter({'elit': 5, 'sed': 5, 'in': 5, 'adipiscing': 4, 'mollis': 4, 'eu': 3,
# 'id': 3, 'nunc': 3, 'consectetur': 3, 'non': 3, 'ipsum': 3, 'nulla': 3, 'pretium':
# 2, 'lacus': 2, 'ornare': 2, 'at': 2, 'praesent': 2, 'quis': 2, 'sit': 2, 'congue': 2, 'amet': 2,
# 'etiam': 2, 'urna': 2, 'a': 2, 'magna': 2, 'lorem': 2, 'aliquam': 2, 'ut': 2, 'ultricies': 2, 'mi': 2,
# 'dolor': 2, 'metus': 2, 'ac': 1, 'bibendum': 1, 'posuere': 1, 'enim': 1, 'ante': 1, 'sodales': 1, 'tellus': 1,
# 'vitae': 1, 'dui': 1, 'diam': 1, 'pellentesque': 1, 'massa': 1, 'vel': 1, 'nullam': 1, 'feugiat': 1, 'luctus': 1,
# 'pulvinar': 1, 'iaculis': 1, 'hendrerit': 1, 'orci': 1, 'turpis': 1, 'nibh': 1, 'scelerisque': 1, 'ullamcorper': 1,
# 'eget': 1, 'neque': 1, 'euismod': 1, 'curabitur': 1, 'leo': 1, 'sapien': 1, 'facilisi': 1, 'vestibulum': 1, 'nisi': 1,
# 'justo': 1, 'augue': 1, 'tempor': 1, 'lacinia': 1, 'malesuada': 1})
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | import re from collections import Counter string = """ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc ut elit id mi ultricies adipiscing. Nulla facilisi. Praesent pulvinar, sapien vel feugiat vestibulum, nulla dui pretium orci, non ultricies elit lacus quis ante. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam pretium ullamcorper urna quis iaculis. Etiam ac massa sed turpis tempor luctus. Curabitur sed nibh eu elit mollis congue. Praesent ipsum diam, consectetur vitae ornare a, aliquam a nunc. In id magna pellentesque tellus posuere adipiscing. Sed non mi metus, at lacinia augue. Sed magna nisi, ornare in mollis in, mollis sed nunc. Etiam at justo in leo congue mollis. Nullam in neque eget metus hendrerit scelerisque eu non enim. Ut malesuada lacus eu nulla bibendum id euismod urna sodales. """ words = re.findall(r'\w+', string) #This finds words in the document lower_words = [word.lower() for word in words] #lower all the words word_counts = Counter(lower_words) #counts the number each time a word appears print word_counts # Counter({'elit': 5, 'sed': 5, 'in': 5, 'adipiscing': 4, 'mollis': 4, 'eu': 3, # 'id': 3, 'nunc': 3, 'consectetur': 3, 'non': 3, 'ipsum': 3, 'nulla': 3, 'pretium': # 2, 'lacus': 2, 'ornare': 2, 'at': 2, 'praesent': 2, 'quis': 2, 'sit': 2, 'congue': 2, 'amet': 2, # 'etiam': 2, 'urna': 2, 'a': 2, 'magna': 2, 'lorem': 2, 'aliquam': 2, 'ut': 2, 'ultricies': 2, 'mi': 2, # 'dolor': 2, 'metus': 2, 'ac': 1, 'bibendum': 1, 'posuere': 1, 'enim': 1, 'ante': 1, 'sodales': 1, 'tellus': 1, # 'vitae': 1, 'dui': 1, 'diam': 1, 'pellentesque': 1, 'massa': 1, 'vel': 1, 'nullam': 1, 'feugiat': 1, 'luctus': 1, # 'pulvinar': 1, 'iaculis': 1, 'hendrerit': 1, 'orci': 1, 'turpis': 1, 'nibh': 1, 'scelerisque': 1, 'ullamcorper': 1, # 'eget': 1, 'neque': 1, 'euismod': 1, 'curabitur': 1, 'leo': 1, 'sapien': 1, 'facilisi': 1, 'vestibulum': 1, 'nisi': 1, # 'justo': 1, 'augue': 1, 'tempor': 1, 'lacinia': 1, 'malesuada': 1}) |
1.2 Deque
Deque是一种由队列结构扩展而来的双端队列(double-ended queue),队列元素能够在队列两端添加或删除。因此它还被称为头尾连接列表(head-tail linked list),尽管叫这个名字的还有另一个特殊的数据结构实现。Deque支持线程安全的,经过优化的append和pop操作,在队列两端的相关操作都能够达到近乎O(1)的时间复杂度。虽然list也支持类似的操作,但是它是对定长列表的操作表现很不错,而当遇到pop(0)和insert(0, v)这样既改变了列表的长度又改变其元素位置的操作时,其复杂度就变为O(n)了。
来看看相关的比较结果:
Python
import time
from collections import deque
num = 100000
def append(c):
for i in range(num):
c.append(i)
def appendleft(c):
if isinstance(c, deque):
for i in range(num):
c.appendleft(i)
else:
for i in range(num):
c.insert(0, i)
def pop(c):
for i in range(num):
c.pop()
def popleft(c):
if isinstance(c, deque):
for i in range(num):
c.popleft()
else:
for i in range(num):
c.pop(0)
for container in [deque, list]:
for operation in [append, appendleft, pop, popleft]:
c = container(range(num))
start = time.time()
operation(c)
elapsed = time.time() - start
print "Completed {0}/{1} in {2} seconds: {3} ops/sec".format(
container.__name__, operation.__name__, elapsed, num / elapsed)
# Completed deque/append in 0.0250000953674 seconds: 3999984.74127 ops/sec
# Completed deque/appendleft in 0.0199999809265 seconds: 5000004.76838 ops/sec
# Completed deque/pop in 0.0209999084473 seconds: 4761925.52225 ops/sec
# Completed deque/popleft in 0.0199999809265 seconds: 5000004.76838 ops/sec
# Completed list/append in 0.0220000743866 seconds: 4545439.17637 ops/sec
# Completed list/appendleft in 21.3209998608 seconds: 4690.21155917 ops/sec
# Completed list/pop in 0.0240001678467 seconds: 4166637.52682 ops/sec
# Completed list/popleft in 4.01799988747 seconds: 24888.0046791 ops/sec
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import time from collections import deque num = 100000 def append(c): for i in range(num): c.append(i) def appendleft(c): if isinstance(c, deque): for i in range(num): c.appendleft(i) else: for i in range(num): c.insert(0, i) def pop(c): for i in range(num): c.pop() def popleft(c): if isinstance(c, deque): for i in range(num): c.popleft() else: for i in range(num): c.pop(0) for container in [deque, list]: for operation in [append, appendleft, pop, popleft]: c = container(range(num)) start = time.time() operation(c) elapsed = time.time() - start "Completed {0}/{1} in {2} seconds: {3} ops/sec".format( container.__name__, operation.__name__, elapsed, num / elapsed) # Completed deque/append in 0.0250000953674 seconds: 3999984.74127 ops/sec # Completed deque/appendleft in 0.0199999809265 seconds: 5000004.76838 ops/sec # Completed deque/pop in 0.0209999084473 seconds: 4761925.52225 ops/sec # Completed deque/popleft in 0.0199999809265 seconds: 5000004.76838 ops/sec # Completed list/append in 0.0220000743866 seconds: 4545439.17637 ops/sec # Completed list/appendleft in 21.3209998608 seconds: 4690.21155917 ops/sec # Completed list/pop in 0.0240001678467 seconds: 4166637.52682 ops/sec # Completed list/popleft in 4.01799988747 seconds: 24888.0046791 ops/sec |
Python
from collections import deque
q = deque(range(5))
q.append(5)
q.appendleft(6)
print q
print q.pop()
print q.popleft()
print q.rotate(3)
print q
print q.rotate(-1)
print q
# deque([6, 0, 1, 2, 3, 4, 5])
# 5
# 6
# None
# deque([2, 3, 4, 0, 1])
# None
# deque([3, 4, 0, 1, 2])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | from collections import deque q = deque(range(5)) q.append(5) q.appendleft(6) q print q.pop() q.popleft() print q.rotate(3) q print q.rotate(-1) q # deque([6, 0, 1, 2, 3, 4, 5]) # 5 # 6 # None # deque([2, 3, 4, 0, 1]) # None # deque([3, 4, 0, 1, 2]) |
1.3 Defaultdict
这个类型除了在处理不存在的键的操作之外与普通的字典完全相同。当查找一个不存在的键操作发生时,它的default_factory会被调用,提供一个默认的值,并且将这对键值存储下来。其他的参数同普通的字典方法dict()一致,一个defaultdict的实例同内建dict一样拥有同样地操作。defaultdict对象在当你希望使用它存放追踪数据的时候很有用。举个例子,假定你希望追踪一个单词在字符串中的位置,那么你可以这么做:
Python
from collections import defaultdict
s = "the quick brown fox jumps over the lazy dog"
words = s.split()
location = defaultdict(list)
for m, n in enumerate(words):
location
.append(m)
print location
# defaultdict(<type 'list'>, {'brown': [2], 'lazy': [7], 'over': [5], 'fox': [3],
# 'dog': [8], 'quick': [1], 'the': [0, 6], 'jumps': [4]})
1 2 3 4 5 6 7 8 9 10 11 12 13 | from collections import defaultdict s = "the quick brown fox jumps over the lazy dog" words = s.split() location = defaultdict(list) for m, n in enumerate(words): location[n].append(m) print location # defaultdict(<type 'list'>, {'brown': [2], 'lazy': [7], 'over': [5], 'fox': [3], # 'dog': [8], 'quick': [1], 'the': [0, 6], 'jumps': [4]}) |
Python
from collections import defaultdict
s = "the quick brown fox jumps over the lazy dog"
words = s.split()
location = defaultdict(set)
for m, n in enumerate(words):
location
.add(m)
print location
# defaultdict(<type 'set'>, {'brown': set([2]), 'lazy': set([7]),
# 'over': set([5]), 'fox': set([3]), 'dog': set([8]), 'quick': set([1]),
# 'the': set([0, 6]), 'jumps': set([4])})
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | from collections import defaultdict s = "the quick brown fox jumps over the lazy dog" words = s.split() location = defaultdict(set) for m, n in enumerate(words): location[n].add(m) print location # defaultdict(<type 'set'>, {'brown': set([2]), 'lazy': set([7]), # 'over': set([5]), 'fox': set([3]), 'dog': set([8]), 'quick': set([1]), # 'the': set([0, 6]), 'jumps': set([4])}) |
Python
s = "the quick brown fox jumps over the lazy dog"
d = {}
words = s.split()
for key, value in enumerate(words):
d.setdefault(key, []).append(value)
print d
# {0: ['the'], 1: ['quick'], 2: ['brown'], 3: ['fox'], 4: ['jumps'], 5: ['over'], 6: ['the'], 7: ['lazy'], 8: ['dog']}
1 2 3 4 5 6 7 8 9 | s = "the quick brown fox jumps over the lazy dog" d = {} words = s.split() for key, value in enumerate(words): d.setdefault(key, []).append(value) d # {0: ['the'], 1: ['quick'], 2: ['brown'], 3: ['fox'], 4: ['jumps'], 5: ['over'], 6: ['the'], 7: ['lazy'], 8: ['dog']} |
Python
class Example(dict):
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
a = Example()
a[1][2][3] = 4
a[1][3][3] = 5
a[1][2]['test'] = 6
print a # {1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | class Example(dict): def __getitem__(self, item): try: return dict.__getitem__(self, item) except KeyError: value = self[item] = type(self)() return value a = Example() a[1][2][3] = 4 a[1][3][3] = 5 a[1][2]['test'] = 6 a # {1: {2: {'test': 6, 3: 4}, 3: {3: 5}}} |
2. Array
array模块定义了一个很像list的新对象类型,不同之处在于它限定了这个类型只能装一种类型的元素。array元素的类型是在创建并使用的时候确定的。如果你的程序需要优化内存的使用,并且你确定你希望在list中存储的数据都是同样类型的,那么使用array模块很合适。举个例子,如果需要存储一千万个整数,如果用list,那么你至少需要160MB的存储空间,然而如果使用array,你只需要40MB。但虽然说能够节省空间,array上几乎没有什么基本操作能够比在list上更快。
在使用array进行计算的时候,需要特别注意那些创建list的操作。例如,使用列表推导式(list comprehension)的时候,会将array整个转换为list,使得存储空间膨胀。一个可行的替代方案是使用生成器表达式创建新的array。看代码:
Python
import array
a = array.array("i", [1,2,3,4,5])
b = array.array(a.typecode, (2*x for x in a))
1 2 3 4 | import array a = array.array("i", [1,2,3,4,5]) b = array.array(a.typecode, (2*x for x in a)) |
Python
import array
a = array.array("i", [1,2,3,4,5])
for i, x in enumerate(a):
a[i] = 2*x
1 2 3 4 5 | import array a = array.array("i", [1,2,3,4,5]) for i, x in enumerate(a): a[i] = 2*x |
那么什么时候使用array呢?是当你在考虑计算的因素之外,还需要得到一个像C语言里一样统一元素类型的数组时。
Python
import array
from timeit import Timer
def arraytest():
a = array.array("i", [1, 2, 3, 4, 5])
b = array.array(a.typecode, (2 * x for x in a))
def enumeratetest():
a = array.array("i", [1, 2, 3, 4, 5])
for i, x in enumerate(a):
a[i] = 2 * x
if __name__=='__main__':
m = Timer("arraytest()", "from __main__ import arraytest")
n = Timer("enumeratetest()", "from __main__ import enumeratetest")
print m.timeit() # 5.22479210582
print n.timeit() # 4.34367196717
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | import array from timeit import Timer def arraytest(): a = array.array("i", [1, 2, 3, 4, 5]) b = array.array(a.typecode, (2 * x for x in a)) def enumeratetest(): a = array.array("i", [1, 2, 3, 4, 5]) for i, x in enumerate(a): a[i] = 2 * x if __name__=='__main__': m = Timer("arraytest()", "from __main__ import arraytest") n = Timer("enumeratetest()", "from __main__ import enumeratetest") m.timeit() # 5.22479210582 n.timeit() # 4.34367196717 |
3. Heapq
heapq模块使用一个用堆实现的优先级队列。堆是一种简单的有序列表,并且置入了堆的相关规则。堆是一种树形的数据结构,树上的子节点与父节点之间存在顺序关系。二叉堆(binary heap)能够用一个经过组织的列表或数组结构来标识,在这种结构中,元素N的子节点的序号为2*N+1和2*N+2(下标始于0)。简单来说,这个模块中的所有函数都假设序列是有序的,所以序列中的第一个元素(seq[0])是最小的,序列的其他部分构成一个二叉树,并且seq[i]节点的子节点分别为seq[2*i+1]以及seq[2*i+2]。当对序列进行修改时,相关函数总是确保子节点大于等于父节点。
Python
import heapq
heap = []
for value in [20, 10, 30, 50, 40]:
heapq.heappush(heap, value)
while heap:
print heapq.heappop(heap)
1 2 3 4 5 6 7 8 9 | import heapq heap = [] for value in [20, 10, 30, 50, 40]: heapq.heappush(heap, value) while heap: heapq.heappop(heap) |
Python
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]
1 2 3 4 5 | import heapq nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2] print(heapq.nlargest(3, nums)) # Prints [42, 37, 23] print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2] |
Python
import heapq
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print cheap
# [{'price': 16.35, 'name': 'YHOO', 'shares': 45},
# {'price': 21.09, 'name': 'FB', 'shares': 200}, {'price': 31.75, 'name': 'HPQ', 'shares': 35}]
print expensive
# [{'price': 543.22, 'name': 'AAPL', 'shares': 50}, {'price': 115.65, 'name': 'ACME',
# 'shares': 75}, {'price': 91.1, 'name': 'IBM', 'shares': 100}]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | import heapq portfolio = [ {'name': 'IBM', 'shares': 100, 'price': 91.1}, {'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}, {'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'ACME', 'shares': 75, 'price': 115.65} ] cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price']) expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price']) print cheap # [{'price': 16.35, 'name': 'YHOO', 'shares': 45}, # {'price': 21.09, 'name': 'FB', 'shares': 200}, {'price': 31.75, 'name': 'HPQ', 'shares': 35}] expensive # [{'price': 543.22, 'name': 'AAPL', 'shares': 50}, {'price': 115.65, 'name': 'ACME', # 'shares': 75}, {'price': 91.1, 'name': 'IBM', 'shares': 100}] |
Python
import heapq
class Item:
def __init__(self, name):
self.name = name
def __repr__(self):
return 'Item({!r})'.format(self.name)
class PriorityQueue:
def __init__(self):
self._queue = []
self._index = 0
def push(self, item, priority):
heapq.heappush(self._queue, (-priority, self._index, item))
self._index += 1
def pop(self):
return heapq.heappop(self._queue)[-1]
q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
print q.pop() # Item('bar')
print q.pop() # Item('spam')
print q.pop() # Item('foo')
print q.pop() # Item('grok')
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | import heapq class Item: def __init__(self, name): self.name = name def __repr__(self): return 'Item({!r})'.format(self.name) class PriorityQueue: def __init__(self): self._queue = [] self._index = 0 def push(self, item, priority): heapq.heappush(self._queue, (-priority, self._index, item)) self._index += 1 def pop(self): return heapq.heappop(self._queue)[-1] q = PriorityQueue() q.push(Item('foo'), 1) q.push(Item('bar'), 5) q.push(Item('spam'), 4) q.push(Item('grok'), 1) print q.pop() # Item('bar') q.pop() # Item('spam') print q.pop() # Item('foo') q.pop() # Item('grok') |
4. Bisect
bisect模块能够提供保持list元素序列的支持。它使用了二分法完成大部分的工作。它在向一个list插入元素的同时维持list是有序的。在某些情况下,这比重复的对一个list进行排序更为高效,并且对于一个较大的list来说,对每步操作维持其有序也比对其排序要高效。假设你有一个range集合:
Python
a = [(0, 100), (150, 220), (500, 1000)]
1 | a = [(0, 100), (150, 220), (500, 1000)] |
Python
import bisect
a = [(0, 100), (150, 220), (500, 1000)]
bisect.insort_right(a, (250,400))
print a # [(0, 100), (150, 220), (250, 400), (500, 1000)]
1 2 3 4 5 6 7 | import bisect a = [(0, 100), (150, 220), (500, 1000)] bisect.insort_right(a, (250,400)) a # [(0, 100), (150, 220), (250, 400), (500, 1000)] |
Python
import bisect
a = [(0, 100), (150, 220), (500, 1000)]
bisect.insort_right(a, (250,400))
bisect.insort_right(a, (399, 450))
print a # [(0, 100), (150, 220), (250, 400), (500, 1000)]
print bisect.bisect(a, (550, 1200)) # 5
1 2 3 4 5 6 7 8 9 | import bisect a = [(0, 100), (150, 220), (500, 1000)] bisect.insort_right(a, (250,400)) bisect.insort_right(a, (399, 450)) a # [(0, 100), (150, 220), (250, 400), (500, 1000)] bisect.bisect(a, (550, 1200)) # 5 |
Python
import bisect
a = [(0, 100), (150, 220), (500, 1000)]
bisect.insort_right(a, (250,400))
bisect.insort_right(a, (399, 450))
print a # [(0, 100), (150, 220), (250, 400), (500, 1000)]
print bisect.bisect(a, (550, 1200)) # 5
bisect.insort_right(a, (550, 1200))
print a # [(0, 100), (150, 220), (250, 400), (399, 450), (500, 1000), (550, 1200)]
1 2 3 4 5 6 7 8 9 10 11 | import bisect a = [(0, 100), (150, 220), (500, 1000)] bisect.insort_right(a, (250,400)) bisect.insort_right(a, (399, 450)) a # [(0, 100), (150, 220), (250, 400), (500, 1000)] bisect.bisect(a, (550, 1200)) # 5 bisect.insort_right(a, (550, 1200)) a # [(0, 100), (150, 220), (250, 400), (399, 450), (500, 1000), (550, 1200)] |
5. Weakref
weakref模块能够帮助我们创建Python引用,却不会阻止对象的销毁操作。这一节包含了weak reference的基本用法,并且引入一个代理类。在开始之前,我们需要明白什么是strong reference。strong reference是一个对对象的引用次数、生命周期以及销毁时机产生影响的指针。strong reference如你所见,就是当你将一个对象赋值给一个变量的时候产生的:
Python
>>> a = [1,2,3]
>>> b = a
1 2 | >>> a = [1,2,3] >>> b = a |
Python
class Foo(object):
def __init__(self):
self.obj = None
print 'created'
def __del__(self):
print 'destroyed'
def show(self):
print self.obj
def store(self, obj):
self.obj = obj
a = Foo() # created
b = a
del a
del b # destroyed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | class Foo(object): def __init__(self): self.obj = None 'created' def __del__(self): 'destroyed' def show(self): self.obj def store(self, obj): self.obj = obj a = Foo() # created b = a del a del b # destroyed |
你可以使用weakref.ref函数来创建对象的weak reference。这个函数调用需要将一个strong reference作为第一个参数传给函数,并且返回一个weak reference。
Python
>>> import weakref
>>> a = Foo()
created
>>> b = weakref.ref(a)
>>> b
1 2 3 4 5 | >>> import weakref >>> a = Foo() created >>> b = weakref.ref(a) >>> b |
Python
>>> a == b()
True
>>> b().show()
None
1 2 3 4 | >>> a == b() True >>> b().show() None |
Python
>>> del a
destroyed
1 2 | >>> del a destroyed |
Python
>>> b() is None
True
1 2 | >>> b() is None True |
Python
>>> a = Foo()
created
>>> b = weakref.proxy(a)
>>> b.store('fish')
>>> b.show()
fish
>>> del a
destroyed
>>> b.show()
Traceback (most recent call last):
File "", line 1, in ?
ReferenceError: weakly-referenced object no longer exists
1 2 3 4 5 6 7 8 9 10 11 12 | >>> a = Foo() created >>> b = weakref.proxy(a) >>> b.store('fish') >>> b.show() fish >>> del a destroyed >>> b.show() Traceback (most recent call last): File "", line 1, in ? ReferenceError: weakly-referenced object no longer exists |
完整的例子:
引用计数器是由Python的垃圾回收器使用的,当一个对象的应用计数器变为0,则其将会被垃圾回收器回收。最好将weak reference用于开销较大的对象,或避免循环引用(虽然垃圾回收器经常干这种事情)。
Python
import weakref
import gc
class MyObject(object):
def my_method(self):
print 'my_method was called!'
obj = MyObject()
r = weakref.ref(obj)
gc.collect()
assert r() is obj #r() allows you to access the object referenced: it's there.
obj = 1 #Let's change what obj references to
gc.collect()
assert r() is None #There is no object left: it was gc'ed.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | import weakref import gc class MyObject(object): def my_method(self): 'my_method was called!' obj = MyObject() r = weakref.ref(obj) gc.collect() assert r() is obj #r() allows you to access the object referenced: it's there. obj = 1 #Let's change what obj references to gc.collect() assert r() is None #There is no object left: it was gc'ed. |
6. Copy()
通过shallow或deep copy语法提供复制对象的函数操作。shallow和deep copying的不同之处在于对于混合型对象的操作(混合对象是包含了其他类型对象的对象,例如list或其他类实例)。
对于shallow copy而言,它创建一个新的混合对象,并且将原对象中其他对象的引用插入新对象。
对于deep copy而言,它创建一个新的对象,并且递归地复制源对象中的其他对象并插入新的对象中。
普通的赋值操作知识简单的将心变量指向源对象。
Python
import copy
a = [1,2,3]
b = [4,5]
c = [a,b]
# Normal Assignment
d = c
print id(c) == id(d) # True - d is the same object as c
print id(c[0]) == id(d[0]) # True - d[0] is the same object as c[0]
# Shallow Copy
d = copy.copy(c)
print id(c) == id(d) # False - d is now a new object
print id(c[0]) == id(d[0]) # True - d[0] is the same object as c[0]
# Deep Copy
d = copy.deepcopy(c)
print id(c) == id(d) # False - d is now a new object
print id(c[0]) == id(d[0]) # False - d[0] is now a new object
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | import copy a = [1,2,3] b = [4,5] c = [a,b] # Normal Assignment d = c id(c) == id(d) # True - d is the same object as c print id(c[0]) == id(d[0]) # True - d[0] is the same object as c[0] # Shallow Copy d = copy.copy(c) id(c) == id(d) # False - d is now a new object print id(c[0]) == id(d[0]) # True - d[0] is the same object as c[0] # Deep Copy d = copy.deepcopy(c) id(c) == id(d) # False - d is now a new object print id(c[0]) == id(d[0]) # False - d[0] is now a new object |
deep copy (deepcopy())创建的对象包含的引用指向复制出来的新对象。
复杂的例子:
假定我有两个类,名为Manager和Graph,每个Graph包含了一个指向其manager的引用,而每个Manager有一个指向其管理的Graph的集合,现在我们有两个任务需要完成:1) 复制一个graph实例,使用deepcopy,但其manager指向为原graph的manager。
2) 复制一个manager,完全创建新manager,但拷贝原有的所有graph。
Python
import weakref, copy
class Graph(object):
def __init__(self, manager=None):
self.manager = None if manager is None else weakref.ref(manager)
def __deepcopy__(self, memodict):
manager = self.manager()
return Graph(memodict.get(id(manager), manager))
class Manager(object):
def __init__(self, graphs=[]):
self.graphs = graphs
for g in self.graphs:
g.manager = weakref.ref(self)
a = Manager([Graph(), Graph()])
b = copy.deepcopy(a)
if [g.manager() is b for g in b.graphs]:
print True # True
if copy.deepcopy(a.graphs[0]).manager() is a:
print True # True
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | import weakref, copy class Graph(object): def __init__(self, manager=None): self.manager = None if manager is None else weakref.ref(manager) def __deepcopy__(self, memodict): manager = self.manager() return Graph(memodict.get(id(manager), manager)) class Manager(object): def __init__(self, graphs=[]): self.graphs = graphs for g in self.graphs: g.manager = weakref.ref(self) a = Manager([Graph(), Graph()]) b = copy.deepcopy(a) if [g.manager() is b for g in b.graphs]: True # True if copy.deepcopy(a.graphs[0]).manager() is a: True # True |
7. Pprint()
Pprint模块能够提供比较优雅的数据结构打印方式,如果你需要打印一个结构较为复杂,层次较深的字典或是JSON对象时,使用Pprint能够提供较好的打印结果。假定你需要打印一个矩阵,当使用普通的print时,你只能打印出普通的列表,不过如果使用pprint,你就能打出漂亮的矩阵结构
如果
Python
import pprint
matrix = [ [1,2,3], [4,5,6], [7,8,9] ]
a = pprint.PrettyPrinter(width=20)
a.pprint(matrix)
# [[1, 2, 3],
# [4, 5, 6],
# [7, 8, 9]]
1 2 3 4 5 6 7 8 9 | import pprint matrix = [ [1,2,3], [4,5,6], [7,8,9] ] a = pprint.PrettyPrinter(width=20) a.pprint(matrix) # [[1, 2, 3], # [4, 5, 6], # [7, 8, 9]] |
额外的知识
一些基本的数据结构1. 单链链表
Pythonclass Node:
def __init__(self):
self.data = None
self.nextNode = None
def set_and_return_Next(self):
self.nextNode = Node()
return self.nextNode
def getNext(self):
return self.nextNode
def getData(self):
return self.data
def setData(self, d):
self.data = d
class LinkedList:
def buildList(self, array):
self.head = Node()
self.head.setData(array[0])
self.temp = self.head
for i in array[1:]:
self.temp = self.temp.set_and_return_Next()
self.temp.setData(i)
self.tail = self.temp
return self.head
def printList(self):
tempNode = self.head
while(tempNode!=self.tail):
print(tempNode.getData())
tempNode = tempNode.getNext()
print(self.tail.getData())
myArray = [3, 5, 4, 6, 2, 6, 7, 8, 9, 10, 21]
myList = LinkedList()
myList.buildList(myArray)
myList.printList()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | class Node: def __init__(self): self.data = None self.nextNode = None def set_and_return_Next(self): self.nextNode = Node() return self.nextNode def getNext(self): return self.nextNode def getData(self): return self.data def setData(self, d): self.data = d class LinkedList: def buildList(self, array): self.head = Node() self.head.setData(array[0]) self.temp = self.head for i in array[1:]: self.temp = self.temp.set_and_return_Next() self.temp.setData(i) self.tail = self.temp return self.head def printList(self): tempNode = self.head while(tempNode!=self.tail): print(tempNode.getData()) tempNode = tempNode.getNext() print(self.tail.getData()) myArray = [3, 5, 4, 6, 2, 6, 7, 8, 9, 10, 21] myList = LinkedList() myList.buildList(myArray) myList.printList() |
2. 用Python实现的普林姆算法
译者注:普林姆算法(Prims Algorithm)是图论中,在加权连通图中搜索最小生成树的算法。Python
from collections import defaultdict
from heapq import heapify, heappop, heappush
def prim( nodes, edges ):
conn = defaultdict( list )
for n1,n2,c in edges:
conn[ n1 ].append( (c, n1, n2) )
conn[ n2 ].append( (c, n2, n1) )
mst = []
used = set( nodes[ 0 ] )
usable_edges = conn[ nodes[0] ][:]
heapify( usable_edges )
while usable_edges:
cost, n1, n2 = heappop( usable_edges )
if n2 not in used:
used.add( n2 )
mst.append( ( n1, n2, cost ) )
for e in conn[ n2 ]:
if e[ 2 ] not in used:
heappush( usable_edges, e )
return mst
#test
nodes = list("ABCDEFG")
edges = [ ("A", "B", 7), ("A", "D", 5),
("B", "C", 8), ("B", "D", 9), ("B", "E", 7),
("C", "E", 5),
("D", "E", 15), ("D", "F", 6),
("E", "F", 8), ("E", "G", 9),
("F", "G", 11)]
print "prim:", prim( nodes, edges )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | from collections import defaultdict from heapq import heapify, heappop, heappush def prim( nodes, edges ): conn = defaultdict( list ) for n1,n2,c in edges: conn[ n1 ].append( (c, n1, n2) ) conn[ n2 ].append( (c, n2, n1) ) mst = [] used = set( nodes[ 0 ] ) usable_edges = conn[ nodes[0] ][:] heapify( usable_edges ) while usable_edges: cost, n1, n2 = heappop( usable_edges ) if n2 not in used: used.add( n2 ) mst.append( ( n1, n2, cost ) ) for e in conn[ n2 ]: if e[ 2 ] not in used: heappush( usable_edges, e ) return mst #test nodes = list("ABCDEFG") edges = [ ("A", "B", 7), ("A", "D", 5), ("B", "C", 8), ("B", "D", 9), ("B", "E", 7), ("C", "E", 5), ("D", "E", 15), ("D", "F", 6), ("E", "F", 8), ("E", "G", 9), ("F", "G", 11)] "prim:", prim( nodes, edges ) |
总结
如果想了解更多地数据结构信息请参阅相关文档。谢谢阅读。相关文章推荐
- Python中的高级数据结构
- python高级编程---数据结构
- python中的高级数据结构
- Python中的高级数据结构
- python 的3大内置数据结构
- Python学习笔记(1)--数据结构
- 打印python的ctype定义的结构中的数据
- python_数据结构
- Python中数据结构 列表 元组 字典 序列
- 高级数据类型-python
- Python学习(七) -- 数据结构
- python 数据结构
- .net 数据结构与算法基础:高级排序
- python 数据结构
- Python中数据结构之字符串:每种语言必重点讲授的难点内容
- Python学习--数据结构
- Python 2.7 Tutorial —— 数据结构
- python 数据结构
- 8.2日讲座记录-高级数据结构
- python学习总结之数据结构