您的位置：首页 > 编程语言 > Python开发

python collections 系列

2016-01-25 20:37 776 查看

Collections :collections 模块包含了内建类型之外的一些有用的工具，例如 counter,defaultdict,orderedDict,deque,以及nametuple ,其中counter,deque以及defaultdict是最常用的类

1：计数器（counter）

如果你想统计一个单词在给家的序列中一共出现了多少次，诸如此类的操作就可以用到counter,来看看如何统计一个list中出的item次数：

from
collections import
Counter

l = ['cat','is','this','is','cat','is']

c = Counter(l)
print(c)

### 输出结果
Counter({'is':
3, 'cat':
2, 'this':
1})
若要统计这其中不同单词的个数，我们可以这么做

from collections import Counter
l = ['cat','is','this','is','cat','is']
c = Counter(l)
print(c)
print((len(set(l))))

### 输出结果
Counter({'is': 3, 'cat': 2, 'this': 1})
3

counter是对字典类型的补充，具备字典的所有功能然后加自己的功能

1：Counter 类方法，里面给出了详细的例子

class Counter(dict):
    '''Dict subclass for counting hashable items.  Sometimes called a bag
    or multiset.  Elements are stored as dictionary keys and their counts
    are stored as dictionary values.

    >>> c = Counter('abcdeabcdabcaba')  # count elements from a string

    >>> c.most_common(3)                # three most common elements
    [('a', 5), ('b', 4), ('c', 3)]
    >>> sorted(c)                       # list all unique elements
    ['a', 'b', 'c', 'd', 'e']
    >>> ''.join(sorted(c.elements()))   # list elements with repetitions
    'aaaaabbbbcccdde'
    >>> sum(c.values())                 # total of all counts
    15

    >>> c['a']                          # count of letter 'a'
    5
    >>> for elem in 'shazam':           # update counts from an iterable
    ...     c[elem] += 1                # by adding 1 to each element's count
    >>> c['a']                          # now there are seven 'a'
    7
    >>> del c['b']                      # remove all 'b'
    >>> c['b']                          # now there are zero 'b'
    0

    >>> d = Counter('simsalabim')       # make another counter
    >>> c.update(d)                     # add in the second counter
    >>> c['a']                          # now there are nine 'a'
    9

    >>> c.clear()                       # empty the counter
    >>> c
    Counter()

    Note:  If a count is set to zero or reduced to zero, it will remain
    in the counter until the entry is deleted or the counter is cleared:

    >>> c = Counter('aaabbc')
    >>> c['b'] -= 2                     # reduce the count of 'b' by two
    >>> c.most_common()                 # 'b' is still in, but its count is zero
    [('a', 3), ('c', 1), ('b', 0)]

    '''
    # References:
    #   http://en.wikipedia.org/wiki/Multiset     #   http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html     #   http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm     #   http://code.activestate.com/recipes/259174/     #   Knuth, TAOCP Vol. II section 4.6.3

2：most_common -- ：提取最常用的前多少

def most_common(self, n=None):
    '''List the n most common elements and their counts from the most
    common to the least.  If n is None, then list all element counts.

    >>> Counter('abcdeabcdabcaba').most_common(3)
    [('a', 5), ('b', 4), ('c', 3)]

    '''

输出结果

l = ['cat','is','this','is','cat','is','name']
print(Counter(l).most_common(4))
print(Counter(l).most_common(3))
print(Counter(l).most_common(2))
print(Counter(l).most_common(1))
### 输出结果
[('is', 3), ('cat', 2), ('this', 1), ('name', 1)]
[('is', 3), ('cat', 2), ('this', 1)]
[('is', 3), ('cat', 2)]
[('is', 3)]

3：elements ---: 返回包含所有元素的迭代器，小于时忽略

def elements(self):
    '''Iterator over elements repeating each as many times as its count.

    >>> c = Counter('ABCABC')
    >>> sorted(c.elements())
    ['A', 'A', 'B', 'B', 'C', 'C']

    # Knuth's example for prime factors of 1836:  2**2 * 3**3 * 17**1
    >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
    >>> product = 1
    >>> for factor in prime_factors.elements():     # loop over factors
    ...     product *= factor                       # and multiply them
    >>> product
    1836

    Note, if an element's count has been set to zero or is a negative
    number, elements() will ignore it.

    '''
    # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
    return _chain.from_iterable(_starmap(_repeat, self.items()))

# Override dict methods where necessary

4：未实现的方法

@classmethod
def fromkeys(cls, iterable, v=None):
    # There is no equivalent method for counters because setting v=1
    # means that no element can have a count greater than one.
    raise NotImplementedError(
        'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.')

5: update ---: 更新，添加新的内容

def update(*args, **kwds):
    '''Like dict.update() but add counts instead of replacing them.

    Source can be an iterable, a dictionary, or another Counter instance.

    >>> c = Counter('which')
    >>> c.update('witch')           # add elements from another iterable
    >>> d = Counter('watch')
    >>> c.update(d)                 # add elements from another counter
    >>> c['h']                      # four 'h' in which, witch, and watch
    4

    '''
    # The regular dict.update() operation makes no sense here because the
    # replace behavior results in the some of original untouched counts
    # being mixed-in with all of the other counts for a mismash that
    # doesn't have a straight-forward interpretation in most counting
    # contexts.  Instead, we implement straight-addition.  Both the inputs
    # and outputs are allowed to contain zero and negative counts.

    if not args:
        raise TypeError("descriptor 'update' of 'Counter' object "
                        "needs an argument")
    self, *args = args
    if len(args) > 1:
        raise TypeError('expected at most 1 arguments, got %d' % len(args))
    iterable = args[0] if args else None
    if iterable is not None:
        if isinstance(iterable, Mapping):
            if self:
                self_get = self.get
                for elem, count in iterable.items():
                    self[elem] = count + self_get(elem, 0)
            else:
                super(Counter, self).update(iterable) # fast path when counter is empty
        else:
            _count_elements(self, iterable)
    if kwds:
        self.update(kwds)

6：subtract -----：删除匹配的值

def
subtract(*args,
**kwds):

    '''Like dict.update() but subtracts counts instead ofreplacing them.

    Counts can be reduced belowzero. Both the inputs and outputs are

    allowed to contain zero and negativecounts.

    Source can be an iterable, adictionary, or another Counter instance.

    ### 使用方法

    >>> c = Counter('which')

    >>> c.subtract('witch')             # subtract elements from anotheriterable

    >>>c.subtract(Counter('watch'))    #subtract elements from another counter

    >>> c['h']                          # 2 in which, minus 1in witch, minus 1 in watch

    0

    >>> c['w']                          # 1 in which, minus 1in witch, minus 1 in watch

    -1

    '''

    if not args:

        raise TypeError("descriptor 'subtract' of 'Counter' object "

                        "needs anargument")

    self, *args=
args

    if len(args)
> 1:

        raise TypeError('expected at most 1 arguments, got %d'
% len(args))

    iterable = args[0]
if argselse None

    if iterable is not None:

        self_get =
self.get

        if isinstance(iterable, Mapping):

            for elem,count
in iterable.items():

                self[elem] =
self_get(elem, 0)
- count

        else:

            for elemin
iterable:

                self[elem] =
self_get(elem, 0)
- 1

    if kwds:

        self.subtract(kwds)

7: copy ---: 复制

def copy(self):
    'Return a shallow copy.'
    return self.__class__(self)

使用方法：

l=
Counter(['cat','is',])

l1 = l.copy()
print(l1)
print(l)

### 输出结果
Counter({'cat':
1, 'is':
1})

Counter({'cat':
1, 'is':
1})

二：

有序字典(orderedDict)

对字典类型的补充，他实现了对字典对象中元素的排序。

1：OrderedDict 类方法

class
OrderedDict(dict):

    'Dictionary that remembers insertion order'

    # An inherited dict maps keys tovalues.

    # The inherited dict provides__getitem__, __len__, __contains__, and get.

    # The remaining methods are order-aware.

    # Big-O running times for all methodsare the same as regular dictionaries.

    # The internal self.__map dict mapskeys to links in a doubly linked list.

    # The circular doubly linked liststarts and ends with a sentinel element.

    # The sentinel element never getsdeleted (this simplifies the algorithm).

    # The sentinel is in self.__hardrootwith a weakref proxy in self.__root.

    # The prev links are weakref proxies(to prevent circular references).

    # Individual links are kept alive bythe hard reference in self.__map.

    # Those hard references disappearwhen a key is deleted from an OrderedDict.

2：clear ----：请空，返回空

def
clear(self):

    'od.clear() -> None. Remove all items from od.'

    root =
self.__root

    root.prev = root.next=
root

    self.__map.clear()

    dict.clear(self)
使用方法：

d=
{'name':'zhang','age':24}
print(OrderedDict(d).clear())

### 输出结果
None

3: popitem ----: 删除只保留一组值并返回，last值为True时，默认是最后一组，当为False时，返回第一组，默认值为True

def
popitem(self,
last=True):

    '''od.popitem() -> (k, v), return and remove a (key,value) pair.

    Pairs are returned in LIFO order iflast is true or FIFO order if false.

    '''

    if not self:

        raise KeyError('dictionary is empty')

    root = self.__root

    if last:

        link =
root.prev

        link_prev = link.prev

        link_prev.next = root

        root.prev = link_prev

    else:

        link =
root.next

        link_next = link.next

        root.next = link_next

        link_next.prev = root

    key = link.key

    del self.__map[key]

    value=
dict.pop(self, key)

    return key,value
使用方法：

d=
OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
print("default:",d.popitem())
print("------------------------")
print(d)
print("False:",d.popitem(last=False))
print("------------------------")
print(d)
print("True:",d.popitem(last=True))
print("------------------------")
print(d)

### 输出结果
default:
('天天学习',
'好好向上')
------------------------
OrderedDict([('name',
'zhang'), ('age',
24), ('job',
'IT')])
False:
('name',
'zhang')
------------------------
OrderedDict([('age',
24), ('job',
'IT')])
True:
('job',
'IT')
------------------------
OrderedDict([('age',
24)])

4：move_to_end ---：移动指定的key值至原值的结尾，last 默认值为True,移动到末尾，当为False时，移动key数据对至原值的开头

def
move_to_end(self,
key,
last=True):

    '''Move an existing element to the end (or beginning iflast==False).

    Raises KeyError if the element doesnot exist.

    When last=True, acts like a fastversion of self[key]=self.pop(key).

    '''

    link=
self.__map[key]

    link_prev = link.prev

    link_next = link.next

    link_prev.next = link_next

    link_next.prev = link_prev

    root = self.__root

    if last:

        last =
root.prev

        link.prev=
last

        link.next
= root

        last.next=
root.prev =
link

    else:

        first =
root.next

        link.prev=
root

        link.next=
first

        root.next = first.prev
= link
使用方法：

d=
OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d.move_to_end('job',last=False)
print("False_job:",d)
d =
OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))
d.move_to_end('job')
print("======================================================================")
print("True_job:",d)

### 输出结果
False_job:
OrderedDict([('job',
'IT'), ('name',
'zhang'), ('age',
24), ('天天学习',
'好好向上')])
======================================================================
True_job:
OrderedDict([('name',
'zhang'), ('age',
24), ('天天学习',
'好好向上'), ('job',
'IT')])

5: pop ----: 删除，删除指定的key值并返回，如果key 不存在则报错

def
pop(self,
key,
default=__marker):

    '''od.pop(k[,d]) -> v, remove specified key and returnthe corresponding

    value. If key is not found, d is returned if given,otherwise KeyError

    is raised.

    '''

    if key
in self:

        result =
self[key]

        del self[key]

        return result

    if defaultis
self.__marker:

        raise KeyError(key)

    return default
使用方法：

d=
OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))

d1 = d.pop('name')
print(d1)

### 输出结果
zhang

6：setdefault --: 添加可选值default 默认为None

def
setdefault(self,
key,
default=None):

    'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=dif k not in od'

    if key
in self:

        return self[key]

    self[key]
= default

    return default
使用方法：

d
= OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))

d.setdefault('phone','133')

d.setdefault('addr')
print(d)

### 输出结果
OrderedDict([('name',
'zhang'), ('age',
24), ('job',
'IT'), ('天天学习',
'好好向上'), ('phone',
'133'), ('addr',
None)])

7: copy ---: 复制

def
copy(self):

    'od.copy() -> a shallow copy of od'

    return self.__class__(self)
使用方法：

d=
OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))

d1 = d.copy()
print(d1)

### 输出结果
OrderedDict([('name',
'zhang'), ('age',
24), ('job',
'IT'), ('天天学习',
'好好向上')])

8：fromkeys ---: 生成新数据，如果未指定value，将默认为None（感觉为什么这个功能要加在OrderedDict里）

@classmethod
def fromkeys(cls,
iterable,
value=None):

    '''OD.fromkeys(S[, v]) ->
New ordered dictionarywith keys from S.

    If not specified, the value defaultsto None.

    '''

    self =
cls()

    for key
in iterable:

        self[key] =
value

    return self
测试的没头没脑的(和d 没有关系了都)

d=
OrderedDict((('name','zhang'),('age',24),('job','IT'),('天天学习','好好向上')))

d1 = d.fromkeys(('name','SH'))
### 未指定 value值
print(d1)
print("==========================")

d1 = d.fromkeys(('name','li'),value='123')
### 指定 value值
print(d1)

### 输出结果
OrderedDict([('name',
None), ('SH',
None)])
==========================
OrderedDict([('name',
'123'), ('li',
'123')])

三：

默认字典(defaultdict)

对字典的类型的补充，默认给字典的值设置了一个类型，dict的setdefault（）方法在key不存在时会建立一个默认值。与此相反，defaultdict会在初始化时指定默认值。

1：类方法：接收一个类型做为参数而不是一个元素

class
defaultdict(dict):

    """

    defaultdict(default_factory[, ...])--> dict with default factory



    The default factory is called withoutarguments to produce

    a new value when a key is notpresent, in __getitem__ only.

    A defaultdict compares equal to adict with the same items.

    All remaining arguments are treatedthe same as if they were

    passed to the dict constructor,including keyword arguments.

    """
使用方法：

from
collections import
defaultdict

def test1():

  return "namezhang"

l =
defaultdict(test1,job="it")

print(l)

### 输出结果
defaultdict(<function test1 at
0x029C31E0>, {'job':
'it'})

2: copy ---: 复制

def
copy(self):
# real signatureunknown; restored from __doc__

    """ D.copy() -> ashallow copy of D. """

    pass

示例

有如下值集合[11,22,33,44,55,66,77,88,99,90...]，将所有大于 66 的值保存至字典的第一个key中，将小于 66 的值保存至第二个key的值中。
即： {'k1': 大于66 , 'k2':小于66}
原生字典的实现方式

values = [11, 22,33,44,55,66,77,88,99,90]
my_dict = {}
for value in  values:
if value>66:
ifmy_dict.has_key('k1'):
my_dict['k1'].append(value)
else:
my_dict['k1'] =[value]
else:
ifmy_dict.has_key('k2'):
my_dict['k2'].append(value)
else:
my_dict['k2'] =[value]

默认字典的实现方式

from collections importdefaultdict
values = [11, 22,33,44,55,66,77,88,99,90]
my_dict =defaultdict(list)
,
for value in  values:
if value>66:
my_dict['k1'].append(value)
else:
my_dict['k2'].append(value)

四：可命令元组(namedtuple)

生成可以使用名字来访问元素内容的tuple子类,根据namedtuple可以创建一个包含tuple所有功能以及其他功能的类型

使用方法：

import
collections

persion = collections.namedtuple('persion','nameage gender')
print(type(persion))

bob = persion(name=
'bob',age=
30,gender='male')
print(bob)

zhang = persion(name=
'zhang',age=
25,gender='male')
for persion
in [bob,zhang]:

print("%s is %d years old %s"
% persion)

### 输出结果
<class
'type'>
persion(name='bob',
age=30,
gender='male')

bob is 30
years old male

zhang is 25
years old male

可调用的方法

class persion(builtins.tuple)
| persion(name, age, gender)
|
| Method resolution order:
|     persion
|      builtins.tuple
|     builtins.object
|
| Methods defined here:
|
| __getnewargs__(self)
|     Return self as a plain tuple. Used by copy and pickle.
|
| __getstate__(self)
|     Exclude the OrderedDict from pickling
|
| __repr__(self)
|     Return a nicely formatted representation string
|
| _asdict(self)
|     Return a new OrderedDict which maps field names to their values.
|
| _replace(_self, **kwds)
|     Return a new persion object replacing specified fields with new values
|
| ----------------------------------------------------------------------
|  Classmethods defined here:
|
| _make(iterable, new=<built-in method __new__ of type object at0x1D3357D0>, len=<built-in function len>) from builtins.type
|     Make a new persion object from a sequence or iterable
|
| ----------------------------------------------------------------------
| Static methods defined here:
|
|  __new__(_cls,name, age, gender)
|     Create new instance of persion(name, age, gender)
|
| ----------------------------------------------------------------------
|  Datadescriptors defined here:
|
| __dict__
|      Anew OrderedDict mapping field names to their values
|
|  age
|     Alias for field number 1
|
| gender
|     Alias for field number 2
|
|  name
|     Alias for field number 0
|
| ----------------------------------------------------------------------
|  Dataand other attributes defined here:
|
| _fields = ('name', 'age', 'gender')
|
| _source = "from builtins import property as _property,tupl..._itemget...
|
| ----------------------------------------------------------------------
|  Methodsinherited from builtins.tuple:
|
| __add__(self, value, /)
|     Return self+value.
|
| __contains__(self, key, /)
|     Return key in self.
|
| __eq__(self, value, /)
|     Return self==value.
|
| __ge__(self, value, /)
|      Return self>=value.
|
| __getattribute__(self, name, /)
|     Return getattr(self, name).
|
| __getitem__(self, key, /)
|     Return self[key].
|
| __gt__(self, value, /)
|     Return self>value.
|
| __hash__(self, /)
|     Return hash(self).
|
| __iter__(self, /)
|     Implement iter(self).
|
| __le__(self, value, /)
|     Return self<=value.
|
| __len__(self, /)
|     Return len(self).
|
| __lt__(self, value, /)
|     Return self<value.
|
| __mul__(self, value, /)
|     Return self*value.n
|
| __ne__(self, value, /)
|     Return self!=value.
|
| __rmul__(self, value, /)
|     Return self*value.
|
| __sizeof__(...)
|     T.__sizeof__() -- size of T in memory, in bytes
|
| count(...)
|     T.count(value) -> integer -- return number of occurrences of value
|
| index(...)
|     T.index(value, [start, [stop]]) -> integer -- return first index ofvalue.
|      Raises ValueError if the value is notpresent.

五：双向队列(deque)

一个线程安全的双向队列,可以从两端添加删除元素，支持序列的常用操作。

1: 类方法

class
deque(object):

    """

    deque([iterable[, maxlen]]) -->deque object



    Build an ordered collection withoptimized access from its endpoints.

    """

2: append ---: 添加：从右侧添加

def
append(self,
*args,
**kwargs):
# real signature unknown

    """ Add an element tothe right side of the deque. """

    pass
使用方法：

from
collections import
deque

l = deque(['name','age'])

l.append('job')
print(l)

### 输出结果
deque(['name',
'age',
'job'])

3: appendleft: ----: 左侧添加

def
appendleft(self,
*args,
**kwargs):
# real signature unknown

    """ Add an element tothe left side of the deque. """

    pass
使用方法：

from
collections import
deque

l = deque(['name','age'])

l.appendleft('job')
print(l)

### 输出结果
deque(['job',
'name',
'age'])

4: clear ---: 清空

def
clear(self,
*args,
**kwargs):
# real signature unknown

    """ Remove allelements from the deque. """

    pass
使用方法：

from
collections import
deque

l = deque(['name','age'])

l.clear()
print(l)

### 输出结果
deque([])

5: count ---：计算出现次数

def
count(self,
value):
# real signature unknown; restored from __doc__

    """ D.count(value)-> integer -- return number of occurrences of value """

    return 0
使用方法：

from
collections import
deque

l = deque(['1','2','1','2','3'])
print(l.count('1'))

### 输出结果
2

6: extend ----:扩展，从右侧添加

def
extend(self,
*args,
**kwargs):
# real signature unknown

    """ Extend the rightside of the deque with elements from the iterable """

    pass
使用方法：

from
collections import
deque

l = deque(['1','2','1','2','3'])

l.extend('5')
print(l)

### 输出结果
deque(['1',
'2',
'1',
'2',
'3',
'5'])

7: extendleft ---：左侧扩展添加,与extend相反方向

def
extendleft(self,
*args,
**kwargs):
# real signature unknown

    """ Extend the leftside of the deque with elements from the iterable """

    pass

8: pop --- ：删除并返回最右侧的数据

def
pop(self,
*args,
**kwargs):
# real signature unknown

    """ Remove and returnthe rightmost element. """

    pass
使用方法：

l
= deque(['1','2','1','2','3'])
print(l.pop())

### 输出结果
3

9: popleft ---: 删除并返回最左侧的数据，与pop相反

def
popleft(self,
*args,
**kwargs):
# real signature unknown

    """ Remove and returnthe leftmost element. """

    pass

10: remove ---: 删除第一次出现的值

def
remove(self,
value):
# real signature unknown; restored from __doc__

    """ D.remove(value) --remove first occurrence of value. """

    pass
使用方法：

from collections import deque
l = deque(['1','2','1','2','3'])
l.remove('2')
print(l)

### 输出结果
deque(['1', '1', '2', '3'])

11: reverse ---：取自己的反转值

def
reverse(self):
# real signatureunknown; restored from __doc__

""" D.reverse() --reverse *IN PLACE* """

pass
使用方法：

l = deque(['1','2','1','2','3'])
l.reverse()
print(l)

### 输出结果
deque(['3', '2', '1', '2', '1'])

12: rotate ---: 指定个数反转默认是1，也就是从队列的第右侧拿数据放到最左边

def
rotate(self,
*args,
**kwargs):
# real signature unknown

""" Rotate the deque nsteps to the right (default n=1). If nis negative, rotates left. """

pass
使用方法：

l = deque(['1','2','3','4','5'])
l.rotate(3)
print(l)
l = deque(['1','2','3','4','5'])
l.rotate()
print(l)

### 输出结果
deque(['3', '4', '5', '1', '2'])
deque(['5', '1', '2', '3', '4'])

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航