Source code for ckipnlp.util.ws

#!/usr/bin/env python3
# -*- coding:utf-8 -*-

__author__ = 'Mu Yang <http://muyang.pro>'
__copyright__ = '2018-2019 CKIP Lab'
__license__ = 'CC BY-NC-SA 4.0'

import collections as _collections
import json as _json

from typing import (
    NamedTuple,
)

[docs]class WsWord(NamedTuple): """A word-segmented word.""" word: str #: *str* – the word. pos: str #: *str* – the post-tag.
[docs] @classmethod def from_text(cls, text): """Create a :class:`WsWord` object from :class:`ckipnlp.ws.CkipWs` output. Parameters ---------- text : str A word from :class:`ckipnlp.ws.CkipWs` output. """ return cls(*text.strip(')').rsplit('(', 1))
def __str__(self): return self.to_text()
[docs] def to_text(self): """Transform to plain text.""" return '{}({})'.format(self.word, self.pos)
[docs] def to_dict(self): """Transform to python dict/list.""" return self._asdict() # pylint: disable=no-member
[docs] def to_json(self, **kwargs): """Transform to JSON format.""" return _json.dumps(self.to_dict(), **kwargs)
[docs]class WsSentence(_collections.UserList): # pylint: disable=too-many-ancestors """A word-segmented sentence.""" item_class = WsWord
[docs] @classmethod def from_text(cls, text): """Create :class:`WsSentence` object from :class:`ckipnlp.ws.CkipWs` output. Parameters ---------- text : str A sentence from :class:`ckipnlp.ws.CkipWs` output. """ return cls(map(cls.item_class.from_text, text.split('\u3000')))
def __str__(self): return self.to_text()
[docs] def to_text(self): """Transform to plain text.""" return '\u3000'.join(map(self.item_class.to_text, self))
[docs] def to_dict(self): """Transform to python dict/list.""" return [word.to_dict() for word in self]
[docs] def to_json(self, **kwargs): """Transform to JSON format.""" return _json.dumps(self.to_dict(), **kwargs)
[docs]class WsSentenceList(_collections.UserList): # pylint: disable=too-many-ancestors """A list of word-segmented sentence.""" item_class = WsSentence
[docs] @classmethod def from_text(cls, text_list): """Create :class:`WsSentenceList` object from :class:`ckipnlp.ws.CkipWs` output. Parameters ---------- text_list : List[str] A list of sentence from :class:`ckipnlp.ws.CkipWs` output. """ return cls(map(cls.item_class.from_text, text_list))
def __str__(self): return self.to_text()
[docs] def to_text(self): """Transform to plain text.""" return '\n'.join(map(self.item_class.to_text, self))
[docs] def to_dict(self): """Transform to python dict/list.""" return [sentence.to_dict() for sentence in self]
[docs] def to_json(self, **kwargs): """Transform to JSON format.""" return _json.dumps(self.to_dict(), **kwargs)