kenlm

import kenlm
 
## 将文件导入到 kenlm 语言模型中
model = kenlm.LanguageModel("/data/NLP/Language_Models/lm.bin")
# 使用语言模型对句子进行打分
sentence = 'you are a good man'
model.score(sentence)#-20.92301368713379
sentence = "I'm fine,thinks"
model.score(sentence)#-21.117055892944336
sentence = "wos as dadawnqsao asd aa aa aa"
model.score(sentence)#-46.037437438964844

py-kenlm-model

https://github.com/mattzheng/py-kenlm-model

旺旺教：

#!/usr/bin/env python
import os
import kenlm

path = "/data_local/slm/chinese_csc_1268.bin"
model = kenlm.LanguageModel(path)
print('{0}-gram model'.format(model.order))

sentence = '今天 天气 很 好'
print(model.score(sentence))


# for item in model.full_scores(sentence):
#     print(item)


# Check that total full score = direct score
def score(s):
    return sum(prob for prob, _, _ in model.full_scores(s))


assert (abs(score(sentence) - model.score(sentence)) < 1e-3)

# Show scores and n-gram matches
words = ['<s>'] + sentence.split() + ['</s>']
for i, (prob, length, oov) in enumerate(model.full_scores(sentence)):
    print('{0} {1}: {2}'.format(prob, length, ' '.join(words[i + 2 - length:i + 2])))
    if oov:
        print('\t"{0}" is an OOV'.format(words[i + 1]))

# Find out-of-vocabulary words
for w in words:
    if not w in model:
        print('"{0}" is an OOV'.format(w))