#!/usr/bin/env python3
# -*- coding:utf-8 -*-
__author__ = 'Mu Yang <http://muyang.pro>'
__copyright__ = '2018-2019 CKIP Lab'
__license__ = 'CC-BY-NC-SA 4.0'
import collections as _collections
import json as _json
[docs]class WsWord(_collections.namedtuple('_WsWord', ('word', 'pos',))):
"""A word-segmented word.
Fields:
* **word** (*str*): the word.
* **pos** (*str*): the post-tag.
"""
[docs] @classmethod
def from_text(cls, text):
"""Create :class:`WsWord` object from :class:`ckipnlp.ws.CkipWs` output."""
return cls(*text.strip(')').rsplit('(', 1))
def __str__(self):
return '{}({})'.format(self.word, self.pos)
[docs] def to_dict(self):
return self._asdict()
[docs] def to_json(self, **kwargs):
return _json.dumps(self.to_dict(), **kwargs)
[docs]class WsSentence(_collections.UserList): # pylint: disable=too-many-ancestors
"""A word-segmented sentence.
Items:
:class:`WsWord`: the words.
"""
[docs] @classmethod
def from_text(cls, text):
"""Create :class:`WsSentence` object from :class:`ckipnlp.ws.CkipWs` output."""
return cls(map(WsWord.from_text, text.split('\u3000')))
def __str__(self):
return '\u3000'.join(map(str, self))
[docs] def to_dict(self):
return [word.to_dict() for word in self]
[docs] def to_json(self, **kwargs):
return _json.dumps(self.to_dict(), **kwargs)