string = "Automatic keyword extraction is to extract topical and important words or phrases form document or document set. It is a basic and necessary work in text mining tasks such as text retrieval and text summarization. This paper discusses the connotation of keyword extraction and automatic keyword extraction. In the light of linguistics, cognitive science, complexity science, psychology and social science, this paper studies the theoretical basis of automatic keyword extraction. From macro, meso and micro perspectives, the development, techniques and methods of automatic keyword extraction are reviewed and analyzed. This paper summarizes the current key technologies and research progress of automatic keyword extraction methods, including statistical methods, topic based methods, and network based methods. The evaluation approach of automatic keyword extraction is analyzed, and the challenges and trends of automatic keyword extraction are also predicted."
from jieba.analyse import *
# print(jieba.cut(str))
# print()
for keyword, weight in extract_tags(string, withWeight=True):
print('%s %s' % (keyword, weight))
# kw = tfidf(str)
# print(kw)
# coding = utf-8
import re
import jieba
import networkx as nx
import matplotlib.pyplot as plt
from operator import itemgetter, attrgetter
string = "Automatic keyword extraction is to extract topical and important words or phrases form document or document set. It is a basic and necessary work in text mining tasks such as text retrieval and text summarization. This paper discusses the connotation of keyword extraction and automatic keyword extraction. In the light of linguistics, cognitive science, complexity science, psychology and social science, this paper studies the theoretical basis of automatic keyword extraction. From macro, meso and micro perspectives, the development, techniques and methods of automatic keyword extraction are reviewed and analyzed. This paper summarizes the current key technologies and research progress of automatic keyword extraction methods, including statistical methods, topic based methods, and network based methods. The evaluation approach of automatic keyword extraction is analyzed, and the challenges and trends of automatic keyword extraction are also predicted."
G = nx.Graph()
str = string.split('.')
# print(str)
for s in str:
# s = ' '.join(jieba.cut(s))
ss = re.split(' ',s)
for sss in ss:
for yyy in ss:
# print(sss)
if yyy != sss:
# G.add_node(sss)
nx.draw(G,pos = nx.spring_layout(G),with_labels= True,node_size = 50)
degree = list(
degree.sort(key = itemgetter(1),reverse = True)
closenessCentrality = nx.closeness_centrality(G)
c = sorted(closenessCentrality.items(),key= lambda closenessCentrality:closenessCentrality[1],reverse=True) #紧密中心性
keyword 1.0395450002521738
extraction 1.0395450002521738
automatic 0.7796587501891303
methods 0.6497156251576086
text 0.38982937509456517
paper 0.38982937509456517
science 0.38982937509456517
document 0.25988625006304344
analyzed 0.25988625006304344
based 0.25988625006304344
Automatic 0.12994312503152172
extract 0.12994312503152172
topical 0.12994312503152172
important 0.12994312503152172
words 0.12994312503152172
phrases 0.12994312503152172
form 0.12994312503152172
set 0.12994312503152172
basic 0.12994312503152172
necessary 0.12994312503152172
[('and', 78), ('', 67), ('keyword', 65), ('extraction', 65), ('the', 54), ('of', 54), ('automatic', 54), ('is', 40), ('paper', 35), ('methods', 31), ('are', 26), ('This', 23), ('summarizes', 21), ('current', 21), ('key', 21), ('technologies', 21), ('research', 21), ('progress', 21), ('methods,', 21), ('including', 21), ('statistical', 21), ('topic', 21), ('based', 21), ('network', 21), ('In', 19), ('light', 19), ('linguistics,', 19), ('cognitive', 19), ('science,', 19), ('complexity', 19), ('psychology', 19), ('social', 19), ('this', 19), ('studies', 19), ('theoretical', 19), ('basis', 19), ('From', 17), ('macro,', 17), ('meso', 17), ('micro', 17), ('perspectives,', 17), ('development,', 17), ('techniques', 17), ('reviewed', 17), ('analyzed', 17), ('The', 16), ('evaluation', 16), ('approach', 16), ('analyzed,', 16), ('challenges', 16), ('trends', 16), ('also', 16), ('predicted', 16), ('It', 15), ('a', 15), ('basic', 15), ('necessary', 15), ('work', 15), ('in', 15), ('text', 15), ('mining', 15), ('tasks', 15), ('such', 15), ('as', 15), ('retrieval', 15), ('summarization', 15), ('Automatic', 14), ('to', 14), ('extract', 14), ('topical', 14), ('important', 14), ('words', 14), ('or', 14), ('phrases', 14), ('form', 14), ('document', 14), ('set', 14), ('discusses', 10), ('connotation', 10)]
[('and', 1.0), ('', 0.8764044943820225), ('keyword', 0.8571428571428571), ('extraction', 0.8571428571428571), ('the', 0.7647058823529411), ('of', 0.7647058823529411), ('automatic', 0.7647058823529411), ('is', 0.6724137931034483), ('paper', 0.6446280991735537), ('methods', 0.624), ('are', 0.6), ('This', 0.5864661654135338), ('summarizes', 0.5777777777777777), ('current', 0.5777777777777777), ('key', 0.5777777777777777), ('technologies', 0.5777777777777777), ('research', 0.5777777777777777), ('progress', 0.5777777777777777), ('methods,', 0.5777777777777777), ('including', 0.5777777777777777), ('statistical', 0.5777777777777777), ('topic', 0.5777777777777777), ('based', 0.5777777777777777), ('network', 0.5777777777777777), ('In', 0.5693430656934306), ('light', 0.5693430656934306), ('linguistics,', 0.5693430656934306), ('cognitive', 0.5693430656934306), ('science,', 0.5693430656934306), ('complexity', 0.5693430656934306), ('psychology', 0.5693430656934306), ('social', 0.5693430656934306), ('this', 0.5693430656934306), ('studies', 0.5693430656934306), ('theoretical', 0.5693430656934306), ('basis', 0.5693430656934306), ('From', 0.5611510791366906), ('macro,', 0.5611510791366906), ('meso', 0.5611510791366906), ('micro', 0.5611510791366906), ('perspectives,', 0.5611510791366906), ('development,', 0.5611510791366906), ('techniques', 0.5611510791366906), ('reviewed', 0.5611510791366906), ('analyzed', 0.5611510791366906), ('The', 0.5571428571428572), ('evaluation', 0.5571428571428572), ('approach', 0.5571428571428572), ('analyzed,', 0.5571428571428572), ('challenges', 0.5571428571428572), ('trends', 0.5571428571428572), ('also', 0.5571428571428572), ('predicted', 0.5571428571428572), ('It', 0.5531914893617021), ('a', 0.5531914893617021), ('basic', 0.5531914893617021), ('necessary', 0.5531914893617021), ('work', 0.5531914893617021), ('in', 0.5531914893617021), ('text', 0.5531914893617021), ('mining', 0.5531914893617021), ('tasks', 0.5531914893617021), ('such', 0.5531914893617021), ('as', 0.5531914893617021), ('retrieval', 0.5531914893617021), ('summarization', 0.5531914893617021), ('Automatic', 0.5492957746478874), ('to', 0.5492957746478874), ('extract', 0.5492957746478874), ('topical', 0.5492957746478874), ('important', 0.5492957746478874), ('words', 0.5492957746478874), ('or', 0.5492957746478874), ('phrases', 0.5492957746478874), ('form', 0.5492957746478874), ('document', 0.5492957746478874), ('set', 0.5492957746478874), ('discusses', 0.5342465753424658), ('connotation', 0.5342465753424658)]
Automatic keyword extraction is to extract topical and important words or phrases form document or document set. It is a basic and necessary work in text mining tasks such as text retrieval and text summarization. This paper discusses the connotation of keyword extraction and automatic keyword extraction. In the light of linguistics, cognitive science, complexity science, psychology and social science, this paper studies the theoretical basis of automatic keyword extraction. From macro, meso and micro perspectives, the development, techniques and methods of automatic keyword extraction are reviewed and analyzed. This paper summarizes the current key technologies and research progress of automatic keyword extraction methods, including statistical methods, topic based methods, and network based methods. The evaluation approach of automatic keyword extraction is analyzed, and the challenges and trends of automatic keyword extraction are also predicted.