我正在使用 Stanford CoreNLP 进行情绪分析。我已经检查了 git(Link)上的代码,并希望将其修改为每个评论的情绪频率 (例如,3 正,2 中性,1 负) 并将其打印到 Dataframe 中。我应该如何修改它才能计算每个 sentimentValue 的频率?
import os
import numpy as np
import pandas as pd
inputFile = 'reg_reviews_03.csv' # enter original file name that include review text column
outputFile = 'reg_reviews_NLP_03.csv' # enter oupput file name that will add new sentiment features
# Add empty column columns
df = pd.read_csv(inputFile)
header_list_new = ['numSentence', 'numWords', 'totSentiment', 'avgSentiment', 'Sfreq0','Sfreq1','Sfreq2','Sfreq3','Sfreq4','Sfreq5']
for i, name in enumerate(header_list_new):
df[name] = 0
from pycorenlp import StanfordCoreNLP
nlp = StanfordCoreNLP('http://localhost:9000')
# Function; Output = # sentence, # words, avg.sentimentValue, sentimentHist
def stanford_sentiment(text_str):
res = nlp.annotate(text_str,
properties={
'annotators': 'sentiment',
'outputFormat': 'json',
'timeout': 40000,
})
numSentence = len(res["sentences"])
numWords = len(text_str.split())
# data arrangement
arraySentVal = np.zeros(numSentence)
for i, s in enumerate(res["sentences"]):
arraySentVal[i] = int(s["sentimentValue"])
# sum of sentiment values for all sentences in a text/review
totSentiment = sum(arraySentVal)
# avg. of sentiment values for all sentences in a text/review
avgSentiment = np.mean(arraySentVal)
# frequency of sentimentValue in a text/review; {1 : Negative, 2 : Neutral, 3 : Positive}
bins = [0,1,2,3,4,5,6]
freq = np.histogram(arraySentVal, bins)[0] # getting freq. only w/o bins
return(numSentence, numWords, totSentiment, avgSentiment, freq)
dfLength = len(df)
for i in range(dfLength):
try:
numSentence, numWords, totSentiment, avgSentiment, freq = stanford_sentiment(df.review[i].replace('\n'," "))
df.loc[i,'numSentence'] = numSentence
df.loc[i,'numWords'] = numWords
df.loc[i,'totSentiment'] = totSentiment
df.loc[i,'avgSentiment'] = avgSentiment
df.loc[i,'Sfreq0'] = freq[0]
df.loc[i,'Sfreq1'] = freq[1]
df.loc[i,'Sfreq2'] = freq[2]
df.loc[i,'Sfreq3'] = freq[3]
df.loc[i,'Sfreq4'] = freq[4]
df.loc[i,'Sfreq5'] = freq[5]
except:
print("error where i =", i)
本站系公益性非盈利分享网址,本文来自用户投稿,不代表码文网立场,如若转载,请注明出处
评论列表(31条)