您的位置:首页 > 其它

文本倾向性分析

2016-03-29 08:49 429 查看
package test;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;

public class OpinionAnalyser {
//倾向词表
public Vector <Word> words=new Vector <Word>();
//修饰词表
public Vector <Word> adjectives=new Vector <Word>();
//描述词表
public Vector <Word> descriptions=new Vector <Word>();
//正面句子数
public int posCount;
//负面句子数
public int negCount;

static String SERVER="59.77.233.*";
static String USER="";
static String PASSWORD="";
static String DATABASE="skycent";

//负面词的权重,为2表示负面词是正面词权重的两倍
static int NEG_WEIGHT=2;
static int TITLE_WEIGHT=10;

private static int atoi(String s)
{
return Integer.parseInt(s);
}

//读取数据库初始化三个词表和其他成员变量
public void OpinionAnalyser() throws SQLException
{
ConnDB conndb;
PreparedStatement stmt = null;
//        PreparedStatement stmt = null;
ResultSet rs = null;
conndb = new ConnDB(SERVER, USER, PASSWORD, DATABASE);
conndb.executeUpdate("SET NAMES 'utf8mb4'");

//获取倾向性词表
String strSQL = "select word,polar,weight from twordlist";
try {
stmt = conndb.getConnection().prepareStatement(strSQL);
rs = stmt.executeQuery();
} catch (SQLException e1) {
e1.printStackTrace();
}
//            处理空集情况
if (rs.next() == false) {
System.out.println("twordlist没有词!");
}
else{
rs.previous();
}
while(rs.next())
{
int polar=atoi(rs.getString("polar"));
int weight=atoi(rs.getString("weight"));
//    System.out.println(polar+" "+weight+" "+rs.getString("word"));
Word tmp=new Word(rs.getString("word"),polar,weight);
words.addElement(tmp);
//    System.out.println(polar+" "+weight);
}

//获取描述词表
strSQL = "select word,type from twordlist_ms";
try {
stmt = conndb.getConnection().prepareStatement(strSQL);
rs = stmt.executeQuery();
} catch (SQLException e1) {
e1.printStackTrace();
}
//            处理空集情况
if (rs.next() == false) {
System.out.println("twordlist_ms没有词!");
}
else{
rs.previous();
}
while(rs.next())
{
int polar=atoi(rs.getString("type"));
//    System.out.println(polar+" "+rs.getString("word"));
Word tmp=new Word(rs.getString("word"),polar,0);
descriptions.addElement(tmp);

}

//获取修饰词表
strSQL = "select word,polar,weight from twordlist_xs";
try {
stmt = conndb.getConnection().prepareStatement(strSQL);
rs = stmt.executeQuery();
} catch (SQLException e1) {
e1.printStackTrace();
}
//            处理空集情况
if (rs.next() == false) {
System.out.println("twordlist_xs没有词!");
}
else{
rs.previous();
}
while(rs.next())
{
int polar=atoi(rs.getString("polar"));
int weight=atoi(rs.getString("weight"));
//    System.out.println(polar+" "+weight+" "+rs.getString("word"));
Word tmp=new Word(rs.getString("word"),polar,weight);
adjectives.addElement(tmp);

}
posCount=0;
negCount=0;

conndb.close();
}

//句子倾向性得分
public int sentenceScore(String sentence)
{
int opinionScore=0;
//是否出现倾向词
int opinionPosition=0;

for(int i=0;i<words.size();i++)
{
//找到倾向性词表
opinionPosition=sentence.indexOf(words.get(i).getWord());
//    System.out.println(opinionPosition);

if(opinionPosition!=-1)
{
//是否出现修饰词+倾向词
int flag=0;
for(int j=0;j<adjectives.size();j++)
{
StringBuffer wordPair=new StringBuffer();
wordPair.append(adjectives.get(j).getWord());
wordPair.append(words.get(i).getWord());
int pairPosition =0;
pairPosition=sentence.indexOf(wordPair.toString());

if(pairPosition!=-1)
{
//    System.out.println("yeyeyeyey");
flag=1;
int tmpScore=words.get(i).getWeight()*adjectives.get(j).getWeight()*words.get(i).getPolar()*adjectives.get(j).getPolar();

if(tmpScore>0)
opinionScore +=tmpScore;
else
opinionScore +=tmpScore*NEG_WEIGHT;
}
}
//没出现修饰词只计算倾向次本身的权重
if(flag==0)
{
//    System.out.println(opinionPosition);
//    System.out.println("nnnnnnnnnnnnn");
if(words.get(i).getPolar()==1)
{
opinionScore+=words.get(i).getWeight()*words.get(i).getPolar();
//        System.out.println(words.get(i).getWord());
//        System.out.println("wwwwwwwww");
}
else if(words.get(i).getPolar()==-1)
{
opinionScore+=words.get(i).getWeight()*words.get(i).getPolar()*NEG_WEIGHT;
//        System.out.println(words.get(i).getWord());
}
}
}
}
//System.out.println("最后得分:"+opinionScore);
return opinionScore;
}

//计算一般新闻的倾向性
public void opinion(Set<String> keyword,String text,String title)
{
posCount=0;
negCount=0;
System.out.println("opinion");
//计算title的倾向性
shortTextOpinion(keyword,title);

Set<String> sentences = new HashSet();
String[] array=text.split(" ");
//System.err.println(array.length);
for(int i=0;i<array.length;i++)
{
sentences.add(array[i]);
}
Iterator KwordIter=keyword.iterator();
Iterator senIter=sentences.iterator();
while(KwordIter.hasNext())
{
String kwordIt=KwordIter.next().toString();
while(senIter.hasNext())
{
String senIt=senIter.next().toString();
//    String kwordIt=KwordIter.next().toString();
if((senIt.indexOf(kwordIt))!=-1)
{
//单个句子倾向性得分
int value=sentenceScore(senIt);
if(value>0)
posCount++;
else if(value<0)
negCount +=NEG_WEIGHT;
}
}
}
}

//计算短文本如微博的倾向性
public void shortTextOpinion(Set<String> keyword,String text)
{
System.out.println("shortTextOpinion");
posCount=0;
negCount=0;

int kwordP=0;
int owordP=0;

Iterator kwordIter=keyword.iterator();
while(kwordIter.hasNext())
{
String kwordIt=kwordIter.next().toString();
kwordP=text.indexOf(kwordIt);
//文本中存在关键词
if(kwordP!=-1)
{
int opinionScore=0;
int pairPosition=0;

StringBuffer wordPair=new StringBuffer();

for(int i=0;i<words.size();i++)
{
owordP=text.indexOf(words.get(i).getWord());
if(owordP!=-1)
{
//是否出现词对
int flag=0;
for(int j=0;j<adjectives.size();j++)
{
wordPair.append(adjectives.get(j).getWord());
wordPair.append(words.get(i).getWord());
pairPosition=text.indexOf(wordPair.toString());
if(pairPosition!=-1)
{
flag=1;
int tmpScore=words.get(i).getWeight()*adjectives.get(j).getWeight()*words.get(i).getPolar()*adjectives.get(j).getPolar();
if(tmpScore>0)
opinionScore +=tmpScore;
else
opinionScore +=NEG_WEIGHT*tmpScore;
}
}
if(flag==0)
{
if(words.get(i).getPolar()==1)
opinionScore +=words.get(i).getWeight()*words.get(i).getPolar();
else if(words.get(i).getPolar()==-1)
opinionScore +=NEG_WEIGHT*words.get(i).getWeight()*words.get(i).getPolar();
}
}
}
if(opinionScore>0)
posCount +=TITLE_WEIGHT;
else if(opinionScore<0)
negCount +=TITLE_WEIGHT*NEG_WEIGHT;
}
}
}

//media=3为微博采用短文本倾向性,第二个参数为空
public void analyse(int media,Set<String> keyword,String text,String title)
{
if(media ==3)
{
System.out.println("media=3");
shortTextOpinion(keyword,title);
}
else
{
System.out.println("media=1");
opinion(keyword,text,title);
}
}

//最终倾向性
public int getPolar()
{
if(posCount>negCount)
return 1;
else if(negCount>posCount)
return -1;
else
return 0;
}

public static void main(String[] args) throws SQLException
{
OpinionAnalyser a=new OpinionAnalyser();
a.OpinionAnalyser();
a.sentenceScore("好不好!");
String str="心情很好";
System.out.println("文本倾向性:"+a.sentenceScore(str));
//String text="兴业证券正面临着暴跌!需要采取一定的措施来进行抵御!";
//Set <String> keyword = new HashSet();
//keyword.add("兴业证券");
//keyword.add("金融危机");
//String title="兴业证券面临金融危机";

//a.analyse(1, keyword, text, title);
//System.out.println("该文本最后倾向性:"+a.getPolar());
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: