"""
Collection utility module providing functions for sequence manipulation and grouping operations.
This module offers utilities for working with collections, including:
- Removing duplicates while preserving order
- Grouping elements by custom criteria with optional post-processing
The functions are designed to be type-safe and work with various sequence types.
"""
from typing import Union, TypeVar, Sequence, Callable, Optional, Dict, List, Iterable
__all__ = [
'unique',
'group_by',
]
_ElementType = TypeVar('_ElementType')
[docs]
def unique(s: Union[Sequence[_ElementType]]) -> Sequence[_ElementType]:
"""
Unique all the values in the given ``s``, preserving its original order.
:param s: Original sequence.
:type s: Union[Sequence[_ElementType]]
:return: Unique sequence, with the original type.
:rtype: Sequence[_ElementType]
Examples::
>>> from hbutils.collection import unique
>>>
>>> unique([1, 2, 3, 1])
[1, 2, 3]
>>> unique(('a', 'b', 'a', 'c', 'd', 'e', 'b'))
('a', 'b', 'c', 'd', 'e')
>>> unique([3, 1, 2, 1, 4, 3])
[3, 1, 2, 4]
"""
_set, _result = set(), []
for element in s:
if element not in _set:
_result.append(element)
_set.add(element)
return type(s)(_result)
_GroupType = TypeVar('_GroupType')
_ResultType = TypeVar('_ResultType')
[docs]
def group_by(s: Iterable[_ElementType],
key: Callable[[_ElementType], _GroupType],
gfunc: Optional[Callable[[List[_ElementType]], _ResultType]] = None) -> Dict[_GroupType, _ResultType]:
"""
Divide the elements into groups.
:param s: Elements to be grouped.
:type s: Iterable[_ElementType]
:param key: Group key, should be a callable object that extracts the grouping key from each element.
:type key: Callable[[_ElementType], _GroupType]
:param gfunc: Post-process function for groups, should be a callable object. Default is ``None`` which means \
no post-processing will be performed and raw lists will be returned.
:type gfunc: Optional[Callable[[List[_ElementType]], _ResultType]]
:return: Grouping result as a dictionary mapping group keys to processed group values.
:rtype: Dict[_GroupType, _ResultType]
Examples::
>>> from hbutils.collection import group_by
>>>
>>> foods = [
... 'apple', 'orange', 'pear',
... 'banana', 'fish', 'pork', 'milk',
... ]
>>> group_by(foods, len) # group by length
{5: ['apple'], 6: ['orange', 'banana'], 4: ['pear', 'fish', 'pork', 'milk']}
>>> group_by(foods, len, len) # group and get length
{5: 1, 6: 2, 4: 4}
>>> group_by(foods, lambda x: x[0]) # group by first letter
{'a': ['apple'], 'o': ['orange'], 'p': ['pear', 'pork'], 'b': ['banana'], 'f': ['fish'], 'm': ['milk']}
>>> group_by(foods, lambda x: x[0], len) # group and get length
{'a': 1, 'o': 1, 'p': 2, 'b': 1, 'f': 1, 'm': 1}
"""
gfunc = gfunc or (lambda x: x)
_result_dict: Dict[_GroupType, List[_ElementType]] = {}
for item in s:
_item_key = key(item)
if _item_key not in _result_dict:
_result_dict[_item_key] = []
_result_dict[_item_key].append(item)
return {
key: gfunc(grps)
for key, grps in _result_dict.items()
}