import kenlm ## 将文件导入到 kenlm 语言模型中 model = kenlm.LanguageModel("/data/NLP/Language_Models/lm.bin") # 使用语言模型对句子进行打分 sentence = 'you are a good man' model.score(sentence)#-20.92301368713379 sentence = "I'm fine,thinks" model.score(sentence)#-21.117055892944336 sentence = "wos as dadawnqsao asd aa aa aa" model.score(sentence)#-46.037437438964844
# Show scores and n-gram matches words = ['<s>'] + sentence.split() + ['</s>'] for i, (prob, length, oov) inenumerate(model.full_scores(sentence)): print('{0} {1}: {2}'.format(prob, length, ' '.join(words[i + 2 - length:i + 2]))) if oov: print('\t"{0}" is an OOV'.format(words[i + 1]))
# Find out-of-vocabulary words for w in words: ifnot w in model: print('"{0}" is an OOV'.format(w))