您的位置:首页 > 编程语言 > Java开发

solrj分词Java使用

2015-11-24 11:27 417 查看
小狼最近在看solr分词,环境是搭好了,但是小狼想把这个算出来的分词放到Java端,很纠结



怎么把下面分出来的结果放到Java类中

public static  String  testSolrLocal2() throws SolrServerException{
StringUtill util=new StringUtill();
HttpSolrServer solr = new HttpSolrServer("http://localhost:8888/solr/collection1");
try {
solr.setConnectionTimeout(1000);
solr.setDefaultMaxConnectionsPerHost(100);
solr.setMaxTotalConnections(100);
} catch (Exception e) {
e.printStackTrace();
}
SolrQuery query = new SolrQuery();

query.add(CommonParams.QT, "/analysis/field"); // query type

query.add(AnalysisParams.FIELD_VALUE, "杜淳,我爱你");

query.add(AnalysisParams.FIELD_TYPE, "text_it");
QueryResponse response=solr.query(query);

NamedList<Object> analysis =  (NamedList<Object>) response.getResponse().get("analysis");// analysis node

NamedList<Object> field_types =  (NamedList<Object>) analysis.get("field_types");// field_types node

NamedList<Object> text_it =  (NamedList<Object>) field_types.get("text_it");// text_chinese node

NamedList<Object> index =  (NamedList<Object>) text_it.get("index");// index node

List<SimpleOrderedMap<String>> list =  (ArrayList<SimpleOrderedMap<String>>) index.get("org.apache.lucene.analysis.standard.StandardTokenizer");// tokenizer node

String nextQuery="";
for(Iterator<SimpleOrderedMap<String>> iter = list.iterator(); iter.hasNext();)

{

nextQuery += iter.next().get("text") + " ";

}

return nextQuery.trim();
}


其中QueryResponse 对象的值是一堆json

analysis={
field_types={
text_it={
index={
org.apache.lucene.analysis.standard.StandardTokenizer=[
{
text=杜,
raw_bytes=[
e69d9c
],
start=0,
end=1,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=1,
positionHistory=[
1
]
},
{
text=淳,
raw_bytes=[
e6b7b3
],
start=1,
end=2,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=2,
positionHistory=[
2
]
},
{
text=我,
raw_bytes=[
e68891
],
start=3,
end=4,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=3,
positionHistory=[
3
]
},
{
text=爱,
raw_bytes=[
e788b1
],
start=4,
end=5,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=4,
positionHistory=[
4
]
},
{
text=你,
raw_bytes=[
e4bda0
],
start=5,
end=6,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=5,
positionHistory=[
5
]
}
],
org.apache.lucene.analysis.util.ElisionFilter=[
{
text=杜,
raw_bytes=[
e69d9c
],
start=0,
end=1,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=1,
positionHistory=[
1,
1
]
},
{
text=淳,
raw_bytes=[
e6b7b3
],
start=1,
end=2,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=2,
positionHistory=[
2,
2
]
},
{
text=我,
raw_bytes=[
e68891
],
start=3,
end=4,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=3,
positionHistory=[
3,
3
]
},
{
text=爱,
raw_bytes=[
e788b1
],
start=4,
end=5,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=4,
positionHistory=[
4,
4
]
},
{
text=你,
raw_bytes=[
e4bda0
],
start=5,
end=6,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=5,
positionHistory=[
5,
5
]
}
],
org.apache.lucene.analysis.core.LowerCaseFilter=[
{
text=杜,
raw_bytes=[
e69d9c
],
start=0,
end=1,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=1,
positionHistory=[
1,
1,
1
]
},
{
text=淳,
raw_bytes=[
e6b7b3
],
start=1,
end=2,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=2,
positionHistory=[
2,
2,
2
]
},
{
text=我,
raw_bytes=[
e68891
],
start=3,
end=4,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=3,
positionHistory=[
3,
3,
3
]
},
{
text=爱,
raw_bytes=[
e788b1
],
start=4,
end=5,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=4,
positionHistory=[
4,
4,
4
]
},
{
text=你,
raw_bytes=[
e4bda0
],
start=5,
end=6,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=5,
positionHistory=[
5,
5,
5
]
}
],
org.apache.lucene.analysis.core.StopFilter=[
{
text=杜,
raw_bytes=[
e69d9c
],
start=0,
end=1,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=1,
positionHistory=[
1,
1,
1,
1
]
},
{
text=淳,
raw_bytes=[
e6b7b3
],
start=1,
end=2,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=2,
positionHistory=[
2,
2,
2,
2
]
},
{
text=我,
raw_bytes=[
e68891
],
start=3,
end=4,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=3,
positionHistory=[
3,
3,
3,
3
]
},
{
text=爱,
raw_bytes=[
e788b1
],
start=4,
end=5,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=4,
positionHistory=[
4,
4,
4,
4
]
},
{
text=你,
raw_bytes=[
e4bda0
],
start=5,
end=6,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
position=5,
positionHistory=[
5,
5,
5,
5
]
}
],
org.apache.lucene.analysis.it.ItalianLightStemFilter=[
{
text=杜,
raw_bytes=[
e69d9c
],
start=0,
end=1,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword=false,
position=1,
positionHistory=[
1,
1,
1,
1,
1
]
},
{
text=淳,
raw_bytes=[
e6b7b3
],
start=1,
end=2,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword=false,
position=2,
positionHistory=[
2,
2,
2,
2,
2
]
},
{
text=我,
raw_bytes=[
e68891
],
start=3,
end=4,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword=false,
position=3,
positionHistory=[
3,
3,
3,
3,
3
]
},
{
text=爱,
raw_bytes=[
e788b1
],
start=4,
end=5,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword=false,
position=4,
positionHistory=[
4,
4,
4,
4,
4
]
},
{
text=你,
raw_bytes=[
e4bda0
],
start=5,
end=6,
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1,
type=<IDEOGRAPHIC>,
org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword=false,
position=5,
positionHistory=[
5,
5,
5,
5,
5
]
}
]
}
}
},
field_names={

}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: