python lexical chain

github地址

import nltk
nltk.download('wordnet_ic')
from nltk.corpus import wordnet as wn
from nltk.corpus import wordnet_icthreshold = 0.6  # treshold for wup
jcnTreshold = 0.09  # jcn
pathTeshold = 0.1  # path
brown_ic = wordnet_ic.ic('ic-brown.dat')  # load the brown corpus
lexical_chains = []  # empty list to hold all the chains
dictionary = {}  # empty dictionart to hold the count of each word encounteredclass Chain():def __init__(self, words, senses, count=0):self.words = set(words)self.senses = set(senses)dictionary[words[0]] = 1  # initialize counterdef addWord(self, word):if (len(self.words.intersection([word])) > 0):dictionary[word] += 1else:dictionary[word] = 1self.words.add(word)def addSense(self, sense):self.senses.add(sense)def getWords(self):return self.wordsdef getSenses(self):return self.getSensesdef incCount(self):self.count += 1def add_word(word):maximum = 0maxJCN = 0flag = 0for chain in lexical_chains:  # for all chains that are presentfor synset in wn.synsets(word):  # for all synsets of current wordfor sense in chain.senses:  # for all senses of the current word in current element of the current chainsimilarity = sense.wup_similarity(synset)  # using wup_similarityif (similarity >= maximum):if similarity >= threshold:# print word, synset, sense, sense.jcn_similarity(synset, brown_ic)JCN = sense.jcn_similarity(synset, brown_ic)  # using jcn_similarityif JCN >= jcnTreshold:if sense.path_similarity(synset) >= 0.2:  # using path similarityif JCN >= maxJCN:maximum = similaritymaxJCN = JCNmaxChain = chainflag = 1if flag == 1:maxChain.addWord(word)maxChain.addSense(synset)returnlexical_chains.append(Chain([word], wn.synsets(word)))fileName = input("Enter file path + name, if file name is 'nlp.txt', type 'nlp' \n \n")
fileName += ".txt"
print("\n\n")
# fileName = "nlp.txt" #输入文件名
File = open(fileName)  # open file
lines = File.read()  # read all lines#is_noun = lambda x: True if (pos == 'NN' or pos == 'NNP' or pos == 'NNS' or pos == 'NNPS') else False
nouns = [word for (word, pos) in nltk.pos_tag(nltk.word_tokenize(lines)) if (pos == 'NN' or pos == 'NNP' or pos == 'NNS' or pos == 'NNPS')]  # extract all nounsprint(nouns)
#输出所以名词
#NN     Noun, singular or mass 常用名词 单数形式
#NNS     Noun, plural  常用名词 复数形式
#NNP     Proper noun, singular  专有名词，单数形式
#NNPS     Proper noun, plural  专有名词，复数形式for word in nouns:add_word(word)# print all chains
for chain in lexical_chains:#print(chain)print(", ".join(str(word + "(" + str(dictionary[word]) + ")") for word in chain.getWords()))from collections import Counter
result = dict(Counter(nouns))
print(result)
print ([key for key,value in result.items() if value > 1])
print ({key:value for key,value in result.items() if value > 1})

#输入文件名
#查找所有名词，并输出名词列表（有重复）

#源代码的词汇链

#聚合名词列表，并输出频率大于1的单词。

python lexical chain相关推荐

Python Itertools.chain()用法【将一组迭代对象串联起来，形成一个更大的迭代器】
它是一个需要一系列可迭代对象并返回一个可迭代对象的函数.它将所有可迭代对象组合在一起,并生成一个可迭代对象作为输出. 场景一: chain()可以把一组迭代对象串联起来,形成一个更大的迭代器: &g ...
Python——itertools.chain.from_iterable将多个迭代器连接成一个统一的迭代器的最高效的方法
1 致谢感谢网友"-柚子皮-"的帮助, 原文链接如下: https://blog.csdn.net/pipisorry/article/details/45171451 感谢 P ...
Python itertools chain
v1 = [11,22,33] v2 = [44,55,66]new = chain(v1,v2) for item in new:print(item) from itertools import ...
合并多个python list以及合并多个 django QuerySet 的方法
尊重原文作者,该文转载于: http://www.yihaomen.com/article/python/533.htm 在用python或者django写一些小工具应用的时候,有可能会遇到合并多个l ...
湖北科技学院计算机科学院,钱涛 - 湖北科技学院 - 计算机科学与技术学院
个人简介教育经历 2012/06-至今,武汉大学,计算机学院,工学博士 2003/09-2006/06,湖北工业大学,计算机学院,工学硕士 1993/09-1995/06, 荆州教育学院,数学系,专 ...
马尔可夫链的定义、举例和应用
马尔可夫链通常用来建模排队理论和统计学中的建模,还可作为信号模型用于熵编码技术,如算法编码. 定义马尔可夫链是满足马尔可夫性质的随机过程.马尔可夫链描述了一种状态序列,其每个状态值取决于前面有限个状 ...
WordNet发展概况
· 关于WordNet的不成熟的想法可以追溯到20多年前, 而这一想法开始逐渐具体化和清晰化则是1985年后才开始的. 从85年开始,WordNet作为一个知识工程全面展开.不过, 当时的WordNe ...
WordNet 介绍（ICL-PKU）
WordNet 介绍(ICL-PKU) 一 WordNet发展概况 · 关于WordNet的不成熟的想法可以追溯到20多年前,而这一想法开始逐渐具体化和清晰化则是1985年后才开始的.从85年开始,W ...
篇章分析的理论和应用
篇章分析的理论和应用一.篇章分析的理论衔接性分析衔接性:篇章范围内词汇之间的关联. 一个成分的含义依赖于另一成分的解释连贯性:句子层⾯面的意义关联篇章的衔接性可以分成两类,前者是借助一系列的 ...

python lexical chain

python lexical chain相关推荐

最新文章

热门文章