Source code for ckipnlp.util.ws

#!/usr/bin/env python3
# -*- coding:utf-8 -*-

__author__ = 'Mu Yang <http://muyang.pro>'
__copyright__ = '2018-2020 CKIP Lab'
__license__ = 'CC BY-NC-SA 4.0'

import collections as _collections
import json as _json

from typing import (
    NamedTuple,
)

################################################################################################################################

[docs]class WsWord(NamedTuple): """A word-segmented word.""" word: str = None #: *str* – the word. pos: str = None #: *str* – the post-tag. def __str__(self): return self.to_text()
[docs] @classmethod def from_text(cls, data): """Construct an instance from :class:`ckipnlp.ws.CkipWs` output. Parameters ---------- data : str text such as ``'中文字(Na)'``. Notes ----- - ``'中文字(Na)'`` -> word = ``'中文字'``, pos = ``'Na'`` - ``'中文字'`` -> word = ``'中文字'``, pos = ``None`` """ return cls(*data.strip(')').rsplit('(', 1))
[docs] def to_text(self): """Transform to plain text. Return ------ str """ return '{}({})'.format(self.word, self.pos)
[docs] @classmethod def from_dict(cls, data): """Construct an instance from python built-in containers. Parameters ---------- data : dict dictionary such as ``{ 'word': '中文字', 'pos': 'Na' }`` """ return cls(**data)
[docs] def to_dict(self): """Transform to python built-in containers. Return ------ dict """ return self._asdict() # pylint: disable=no-member
[docs] @classmethod def from_json(cls, data, **kwargs): """Construct an instance from JSON format. Parameters ---------- data : str please refer :meth:`from_dict` for format details. """ return cls.from_dict(_json.loads(data, **kwargs))
[docs] def to_json(self, **kwargs): """Transform to JSON format. Return ------ str """ return _json.dumps(self.to_dict(), **kwargs)
################################################################################################################################
[docs]class WsSentence(_collections.UserList): # pylint: disable=too-many-ancestors """A word-segmented sentence.""" item_class = WsWord def __str__(self): return self.to_text()
[docs] @classmethod def from_text(cls, data): """Construct an instance from :class:`ckipnlp.ws.CkipWs` output. Parameters ---------- data : str text such as ``'中文字(Na)\\u3000喔(T)'``. """ return cls(map(cls.item_class.from_text, data.split('\u3000')))
[docs] def to_text(self): """Transform to plain text. Return ------ str """ return '\u3000'.join(map(self.item_class.to_text, self))
[docs] @classmethod def from_dict(cls, data): """Construct an instance a from python built-in containers. Parameters ---------- data : Sequence[dict] list of objects as :meth:`WsWord.from_dict` input. """ return cls(map(cls.item_class.from_dict, data))
[docs] def to_dict(self): """Transform to python built-in containers. Return ------ List[dict] """ return [word.to_dict() for word in self]
[docs] @classmethod def from_json(cls, data, **kwargs): """Construct an instance from JSON format. Parameters ---------- data : str please refer :meth:`from_dict` for format details. """ return cls.from_dict(_json.loads(data, **kwargs))
[docs] def to_json(self, **kwargs): """Transform to JSON format. Return ------ str """ return _json.dumps(self.to_dict(), **kwargs)
################################################################################################################################
[docs]class WsSentenceList(_collections.UserList): # pylint: disable=too-many-ancestors """A list of word-segmented sentence.""" item_class = WsSentence def __str__(self): return self.to_text()
[docs] @classmethod def from_text(cls, data): """Construct an instance from :class:`ckipnlp.ws.CkipWs` output. Parameters ---------- data : Sequence[str] list of texts as :meth:`WsSentence.from_text` input. """ return cls(map(cls.item_class.from_text, data))
[docs] def to_text(self): """Transform to plain text. Return ------ List[str] """ return list(map(self.item_class.to_text, self))
[docs] @classmethod def from_dict(cls, data): """Construct an instance a from python built-in containers. Parameters ---------- data : Sequence[Sequence[dict]] list of objects as :meth:`WsSentence.from_dict` input. """ return cls(map(cls.item_class.from_dict, data))
[docs] def to_dict(self): """Transform to python built-in containers. Return ------ List[List[dict]] """ return [sentence.to_dict() for sentence in self]
[docs] @classmethod def from_json(cls, data, **kwargs): """Construct an instance from JSON format. Parameters ---------- data : str please refer :meth:`from_dict` for format details. """ return cls.from_dict(_json.loads(data, **kwargs))
[docs] def to_json(self, **kwargs): """Transform to JSON format. Return ------ str """ return _json.dumps(self.to_dict(), **kwargs)