QueryResponse response
= null;
SolrDocumentList results
= null;
Map<String, Object
> mapDto
= new HashMap
<String, Object
>();
SolrCondition condition
= DataCache
.AUTHOR_COND_TION;
try {
SolrQuery query
= new SolrQuery();
query
.setQuery(param
.getMsg());
query
.addField(
"*, score")
.setParam(
"defType",
"edismax");
query
.set(
"q.op",
"OR");
query
.set(
"fl",
"authorName,authorDes,id, authorFlag, articleNum,pv,lastPublishTime,subsNum, score, avartar");
query
.set(
"qf",
"authorName^3 authorDes^2 authorFlag^1");
query
.set(
"bf",
"sum(div(log(lastPublishTime),10)
,div(sum(subsNum,pv),3000),div(articleNum,4000))");
query
.set(
"pf",
"authorName authorDes authorFlag");
query
.setStart(param
.getStart());
query
.setRows(param
.getRows());
query
.set(
"wt",
"json");
query
.setHighlight(
true);
query
.addHighlightField(
"authorName");
query
.addHighlightField(
"authorDes");
query
.setHighlightSimplePre(param
.getPre());
query
.setHighlightSimplePost(param
.getPost());
query
.set(
"hl.boundaryScanner",
"breakIterator");
query
.set(
"hl.bs.type",
"SENTENCE");
query
.setHighlightFragsize(
200);
response
= solrServer
.query(query);
results
= response
.getResults();
Map<String,
Map<String,
List<String>>> highlighting
= response
.getHighlighting();
long total
= results
.getNumFound();
mapDto
.put(
"total", total);
for (int i
= 0; i
< results
.size(); i
++) {
SolrDocument f
= results
.get(i);
if (highlighting
.containsKey(f
.get(
"id"))){
Map<String,
List<String>> map = highlighting
.get(f
.get(
"id"));
if (
map.containsKey(
"authorName")){
f
.setField(
"authorName",
CommonUtils
.getDateSubString(
map.get(
"authorName")
.get(
0)));
}
if (
map.containsKey(
"authorDes")){
f
.setField(
"authorDes",
CommonUtils
.getDateSubString(
map.get(
"authorDes")
.get(
0)));
}
}
}
mapDto
.put(
"total", total);
mapDto
.put(
"results", results);
} catch (SolrServerException e) {
e
.printStackTrace();
};
查询权重,打分 qf
例如:qf=fieldOne^2.3 fieldTwo fieldThree^0.4, qf 参数时指定solr从哪些field中搜索。像下面的话就只会在fieldOne,fieldTwo fieldThree这三个field中搜索。如果你还指定了q参数,比如q=“Hadoop”,那么solr会认为是到 fieldOne,fieldTwo, fieldThree这三个field中搜索hadoop,这三个是并集的关系。生成的query为:fieldOne:hadoop^2.3 | fieldTwo:hadoop | fieldThree:hadoop^0.4 所以你定义了dismax的话,你的查询参数q就不要写成类似q=“title:hadoop”这样了。因为这样的话最终的查询 为:fieldOne:title:hadoop^2.3 | fieldTwo:title:hadoop | fieldThree:title:hadoop^0.4 除非你的这几个field里有“title:hadoop”这样的词存在,否则是查不到结果的。 qf 的查询比例要在pf中制定存在 pf 指定查询的字段
bf 搜索评分 评分越高 排序越前
pf 搜索的字段
搜索功能中比较复杂的是文档的打分排序,solr中的打分规则继承了lucene中的相关的打分规则,这里通过solr的dismax查询解析器来支持复杂的打分 在打分的时候,会考虑以下因素, 搜索关键字匹配某些字段的打分比其他的字段要高(qf^) 对于某些字段,搜索字符串的密集度(phrase)的打分中占的比重(pf^) 其他复杂规则计算,比如销售量、价格、卖家等级等等都可以作为考虑的因素,影响打分(bf) http://10.1.1.58:8080/solr/select?defType=dismax&qf=name^100 subject ^1 &q=sony mp3&pf=name^100 subjec ^1&q.op=OR&bf=sum(recip(ms(NOW,last_modified),3.16e-11,1,1),div(1000,price))^100 这个查询的含义是,在name和subject中搜索关键字sony mp3,name和subject在字段查询中的比重分别为100、1(qf=name^100subject ^1);并且这两个字段phrase的打分为 qf=name^100 subject ^1,也就是name占的比重大一些;其他还参考产品的价格和商品更新时间(bf=sum(recip(ms(NOW,last_modified),3.16e-11,1,1),div(1000,price))^100)