μμ°μ΄ μ²λ¦¬/Today I learned :
[μμ°μ΄ μ²λ¦¬] νκ΅μ΄ ν ν°ν, νμ¬νκΉ κ΅¬ν KoNLPy (Hannanum,Kkma),Khaiii
μ£Όμ π±
2023. 1. 3. 18:18
728x90
λ°μν
μ€μΉ
!pip install konlpy
νλλ(Hannanum)
from konlpy.tag import Hannanum
hannanum = Hannanum()
text = 'μλ
νμΈμ! μ€λ λ§μ΄ μΆμμ'
print(hannanum.morphs(text)) # Parse phrase to morphemes
print(hannanum.nouns(text)) # Noun extractors
print(hannanum.pos(text)) # POS tagger
['μλ
', 'ν', 'μΈ', 'μ', '!', 'μ€λ', 'λ§', 'μ΄', 'μΆ₯', 'μ΄μ']
['μλ
', 'μ€λ']
[('μλ
', 'N'), ('ν', 'X'), ('μΈ', 'E'), ('μ', 'J'), ('!', 'S'), ('μ€λ', 'N'), ('λ§', 'P'), ('μ΄', 'X'), ('μΆ₯', 'P'), ('μ΄μ', 'E')]
κΌ¬κΌ¬λ§
from konlpy.tag import Kkma
kkma = Kkma()
text = 'μλ
νμΈμ! μ€λμ λ§μ΄ μΆμμ'
print(kkma.morphs(text)) # Parse phrase to morphemes
print(kkma.nouns(text)) # Noun extractors
print(kkma.pos(text)) # POS tagger
['μλ
', 'ν', 'μΈμ', '!', 'μ€λ', 'μ', 'λ§μ΄', 'μΆ₯', 'μ΄μ']
['μλ
', 'μ€λ']
[('μλ
', 'NNG'), ('ν', 'XSV'), ('μΈμ', 'EFN'), ('!', 'SF'), ('μ€λ', 'NNG'), ('μ', 'JX'), ('λ§μ΄', 'MAG'), ('μΆ₯', 'VA'), ('μ΄μ', 'EFN')]
Khaiii
!git clone https://github.com/kakao/khaiii.git
!pip install cmake
!mkdir build
!cd build && cmake /content/khaiii
!cd /content/build/ && make all
!cd /content/build/ && make resource
!cd /content/build && make install
!cd /content/build && make package_python
!pip install /content/build/package_python
from khaiii import KhaiiiApi
khaiiApi = KhaiiiApi()
tokenized = khaiiApi.analyze('μ½λ©κ³Ό AI κ°λ°μ΄ λλ€ κ°λ₯ν μ¬λμ λ§μ§ μλ€.')
tokens = []
for word in tokenized:
tokens.extend([str(m).split('/')[0] for m in word.morphs])
print(tokens)
λ°μν