您的位置:首页 > 大数据 > 人工智能

BroadcastShardOperationFailedException TooManyClauses[maxClauseCount is set to

2016-03-10 16:05 781 查看
luncen 查询条件不能大于1024
package org.apache.lucene.search;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0 *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;

/** A Query that matches documents matching boolean combinations of other
* queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
* BooleanQuerys.
*/
public class BooleanQuery extends Query implements Iterable<BooleanClause> {

private static int maxClauseCount = 10240;

/** Thrown when an attempt is made to add more than {@link
* #getMaxClauseCount()} clauses. This typically happens if
* a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery
* is expanded to many terms during search.
*/
public static class TooManyClauses extends RuntimeException {
public TooManyClauses() {
super("maxClauseCount is set to " + maxClauseCount);
}
}

/** Return the maximum number of clauses permitted, 1024 by default.
* Attempts to add more than the permitted number of clauses cause {@link
* TooManyClauses} to be thrown.
* @see #setMaxClauseCount(int)
*/
public static int getMaxClauseCount() { return maxClauseCount; }

/**
* Set the maximum number of clauses permitted per BooleanQuery.
* Default value is 1024.
*/
public static void setMaxClauseCount(int maxClauseCount) {
if (maxClauseCount < 1) {
throw new IllegalArgumentException("maxClauseCount must be >= 1");
}
BooleanQuery.maxClauseCount = maxClauseCount;
}

private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>();
private final boolean disableCoord;

/** Constructs an empty boolean query. */
public BooleanQuery() {
disableCoord = false;
}

/** Constructs an empty boolean query.
*
* {@link Similarity#coord(int,int)} may be disabled in scoring, as
* appropriate. For example, this score factor does not make sense for most
* automatically generated queries, like {@link WildcardQuery} and {@link
* FuzzyQuery}.
*
* @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
*/
public BooleanQuery(boolean disableCoord) {
this.disableCoord = disableCoord;
}

/** Returns true iff {@link Similarity#coord(int,int)} is disabled in
* scoring for this query instance.
* @see #BooleanQuery(boolean)
*/
public boolean isCoordDisabled() { return disableCoord; }

/**
* Specifies a minimum number of the optional BooleanClauses
* which must be satisfied.
*
* <p>
* By default no optional clauses are necessary for a match
* (unless there are no required clauses).  If this method is used,
* then the specified number of clauses is required.
* </p>
* <p>
* Use of this method is totally independent of specifying that
* any specific clauses are required (or prohibited).  This number will
* only be compared against the number of matching optional clauses.
* </p>
*
* @param min the number of optional clauses that must match
*/
public void setMinimumNumberShouldMatch(int min) {
this.minNrShouldMatch = min;
}
protected int minNrShouldMatch = 0;

/**
* Gets the minimum number of the optional BooleanClauses
* which must be satisfied.
*/
public int getMinimumNumberShouldMatch() {
return minNrShouldMatch;
}

/** Adds a clause to a boolean query.
*
* @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
* @see #getMaxClauseCount()
*/
public void add(Query query, BooleanClause.Occur occur) {
add(new BooleanClause(query, occur));
}

/** Adds a clause to a boolean query.
* @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
* @see #getMaxClauseCount()
*/
public void add(BooleanClause clause) {
if (clauses.size() >= maxClauseCount) {
throw new TooManyClauses();
}

clauses.add(clause);
}

/** Returns the set of clauses in this query. */
public BooleanClause[] getClauses() {
return clauses.toArray(new BooleanClause[clauses.size()]);
}

/** Returns the list of clauses in this query. */
public List<BooleanClause> clauses() { return clauses; }

/** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to
* make it possible to do:
* <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre>
*/
@Override
public final Iterator<BooleanClause> iterator() { return clauses().iterator(); }

/**
* Expert: the Weight for BooleanQuery, used to
* normalize, score and explain these queries.
*
* <p>NOTE: this API and implementation is subject to
* change suddenly in the next release.</p>
*/
protected class BooleanWeight extends Weight {
/** The Similarity implementation. */
protected Similarity similarity;
protected ArrayList<Weight> weights;
protected int maxCoord;  // num optional + num required
private final boolean disableCoord;

public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
throws IOException {
this.similarity = searcher.getSimilarity();
this.disableCoord = disableCoord;
weights = new ArrayList<Weight>(clauses.size());
for (int i = 0 ; i < clauses.size(); i++) {
BooleanClause c = clauses.get(i);
Weight w = c.getQuery().createWeight(searcher);
weights.add(w);
if (!c.isProhibited()) {
maxCoord++;
}
}
}

@Override
public Query getQuery() { return BooleanQuery.this; }

@Override
public float getValueForNormalization() throws IOException {
float sum = 0.0f;
for (int i = 0 ; i < weights.size(); i++) {
// call sumOfSquaredWeights for all clauses in case of side effects
float s = weights.get(i).getValueForNormalization();         // sum sub weights
if (!clauses.get(i).isProhibited()) {
// only add to sum for non-prohibited clauses
sum += s;
}
}

sum *= getBoost() * getBoost();             // boost each sub-weight

return sum ;
}

public float coord(int overlap, int maxOverlap) {
// LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
// so coord() is not applied. But when BQ cannot optimize itself away
// for a single clause (minNrShouldMatch, prohibited clauses, etc), its
// important not to apply coord(1,1) for consistency, it might not be 1.0F
return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
}

@Override
public void normalize(float norm, float topLevelBoost) {
topLevelBoost *= getBoost();                         // incorporate boost
for (Weight w : weights) {
// normalize all clauses, (even if prohibited in case of side affects)
w.normalize(norm, topLevelBoost);
}
}

@Override
public Explanation explain(AtomicReaderContext context, int doc)
throws IOException {
final int minShouldMatch =
BooleanQuery.this.getMinimumNumberShouldMatch();
ComplexExplanation sumExpl = new ComplexExplanation();
sumExpl.setDescription("sum of:");
int coord = 0;
float sum = 0.0f;
boolean fail = false;
int shouldMatchCount = 0;
Iterator<BooleanClause> cIter = clauses.iterator();
for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
Weight w = wIter.next();
BooleanClause c = cIter.next();
if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) {
if (c.isRequired()) {
fail = true;
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
sumExpl.addDetail(r);
}
continue;
}
Explanation e = w.explain(context, doc);
if (e.isMatch()) {
if (!c.isProhibited()) {
sumExpl.addDetail(e);
sum += e.getValue();
coord++;
} else {
Explanation r =
new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
}
if (c.getOccur() == Occur.SHOULD) {
shouldMatchCount++;
}
} else if (c.isRequired()) {
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
}
}
if (fail) {
sumExpl.setMatch(Boolean.FALSE);
sumExpl.setValue(0.0f);
sumExpl.setDescription
("Failure to meet condition(s) of required/prohibited clause(s)");
return sumExpl;
} else if (shouldMatchCount < minShouldMatch) {
sumExpl.setMatch(Boolean.FALSE);
sumExpl.setValue(0.0f);
sumExpl.setDescription("Failure to match minimum number "+
"of optional clauses: " + minShouldMatch);
return sumExpl;
}

sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
sumExpl.setValue(sum);

final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
if (coordFactor == 1.0f) {
return sumExpl;                             // eliminate wrapper
} else {
ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
sum*coordFactor,
"product of:");
result.addDetail(sumExpl);
result.addDetail(new Explanation(coordFactor,
"coord("+coord+"/"+maxCoord+")"));
return result;
}
}

@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs)
throws IOException {
List<Scorer> required = new ArrayList<Scorer>();
List<Scorer> prohibited = new ArrayList<Scorer>();
List<Scorer> optional = new ArrayList<Scorer>();
Iterator<BooleanClause> cIter = clauses.iterator();
for (Weight w  : weights) {
BooleanClause c =  cIter.next();
Scorer subScorer = w.scorer(context, true, false, acceptDocs);
if (subScorer == null) {
if (c.isRequired()) {
return null;
}
} else if (c.isRequired()) {
required.add(subScorer);
} else if (c.isProhibited()) {
prohibited.add(subScorer);
} else {
optional.add(subScorer);
}
}

// NOTE: we could also use BooleanScorer, if we knew
// this BooleanQuery was embedded in another
// BooleanQuery that was also using BooleanScorer (ie,
// BooleanScorer can nest).  But this is hard to
// detect and we never do so today... (ie, we only
// return BooleanScorer for topScorer):

// Check if we can and should return a BooleanScorer
// TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch
// but the same is even true of pure conjunctions...
if (!scoreDocsInOrder && topScorer && required.size() == 0 && minNrShouldMatch <= 1) {
return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord);
}

if (required.size() == 0 && optional.size() == 0) {
// no required and optional clauses.
return null;
} else if (optional.size() < minNrShouldMatch) {
// either >1 req scorer, or there are 0 req scorers and at least 1
// optional scorer. Therefore if there are not enough optional scorers
// no documents will be matched by the query
return null;
}

// simple conjunction
if (optional.size() == 0 && prohibited.size() == 0) {
float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
}

// simple disjunction
if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
float coord[] = new float[optional.size()+1];
for (int i = 0; i < coord.length; i++) {
coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
}
return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
}

// Return a BooleanScorer2
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
}

@Override
public boolean scoresDocsOutOfOrder() {
for (BooleanClause c : clauses) {
if (c.isRequired()) {
return false; // BS2 (in-order) will be used by scorer()
}
}

// scorer() will return an out-of-order scorer if requested.
return true;
}

}

@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BooleanWeight(searcher, disableCoord);
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
if (minNrShouldMatch == 0 && clauses.size() == 1) {                    // optimize 1-clause queries
BooleanClause c = clauses.get(0);
if (!c.isProhibited()) {  // just return clause

Query query = c.getQuery().rewrite(reader);    // rewrite first

if (getBoost() != 1.0f) {                 // incorporate boost
if (query == c.getQuery()) {                   // if rewrite was no-op
query = query.clone();         // then clone before boost
}
// Since the BooleanQuery only has 1 clause, the BooleanQuery will be
// written out. Therefore the rewritten Query's boost must incorporate both
// the clause's boost, and the boost of the BooleanQuery itself
query.setBoost(getBoost() * query.getBoost());
}

return query;
}
}

BooleanQuery clone = null;                    // recursively rewrite
for (int i = 0 ; i < clauses.size(); i++) {
BooleanClause c = clauses.get(i);
Query query = c.getQuery().rewrite(reader);
if (query != c.getQuery()) {                     // clause rewrote: must clone
if (clone == null) {
// The BooleanQuery clone is lazily initialized so only initialize
// it if a rewritten clause differs from the original clause (and hasn't been
// initialized already).  If nothing differs, the clone isn't needlessly created
clone = this.clone();
}
clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
}
}
if (clone != null) {
return clone;                               // some clauses rewrote
} else {
return this;                                // no clauses rewrote
}
}

// inherit javadoc
@Override
public void extractTerms(Set<Term> terms) {
for (BooleanClause clause : clauses) {
if (clause.getOccur() != Occur.MUST_NOT) {
clause.getQuery().extractTerms(terms);
}
}
}

@Override @SuppressWarnings("unchecked")
public BooleanQuery clone() {
BooleanQuery clone = (BooleanQuery)super.clone();
clone.clauses = (ArrayList<BooleanClause>) this.clauses.clone();
return clone;
}

/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0;
if (needParens) {
buffer.append("(");
}

for (int i = 0 ; i < clauses.size(); i++) {
BooleanClause c = clauses.get(i);
if (c.isProhibited()) {
buffer.append("-");
} else if (c.isRequired()) {
buffer.append("+");
}

Query subQuery = c.getQuery();
if (subQuery != null) {
if (subQuery instanceof BooleanQuery) {  // wrap sub-bools in parens
buffer.append("(");
buffer.append(subQuery.toString(field));
buffer.append(")");
} else {
buffer.append(subQuery.toString(field));
}
} else {
buffer.append("null");
}

if (i != clauses.size()-1) {
buffer.append(" ");
}
}

if (needParens) {
buffer.append(")");
}

if (getMinimumNumberShouldMatch()>0) {
buffer.append('~');
buffer.append(getMinimumNumberShouldMatch());
}

if (getBoost() != 1.0f) {
buffer.append(ToStringUtils.boost(getBoost()));
}

return buffer.toString();
}

/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object o) {
if (!(o instanceof BooleanQuery)) {
return false;
}
BooleanQuery other = (BooleanQuery)o;
return this.getBoost() == other.getBoost()
&& this.clauses.equals(other.clauses)
&& this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch()
&& this.disableCoord == other.disableCoord;
}

/** Returns a hash code value for this object.*/
@Override
public int hashCode() {
return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
+ getMinimumNumberShouldMatch() + (disableCoord ? 17:0);
}

}

   
下面是在网上看到得一个解决方法: 

可以通过设置: 
BooleanQuery.setMaxClauseCount(10000); 
来解决问题,但是这样带来的问题是会使得内存开销加大。容易出现OutOfMemory的异常所以需要非常谨慎处理。 
Lucene在做大量term值查询时, 如果这值过多, 超1024个term的话, 会出现
TooManyClauses[maxClauseCount is set to 1024] 的异常,因此建议在term过多的情况下采用filter, 而不是query。
以下是该情形在ES中的测试。
 
 

Java代码  


Settings defaultSettings = ImmutableSettings.settingsBuilder().put("client.transport.sniff", true).build();  

        Settings finalSettings = ImmutableSettings.settingsBuilder().put(defaultSettings)  

                .put("name", NetworkUtils.getLocalAddress().getHostName()).build();  

        TransportClient tmp = new TransportClient(finalSettings);  

        Client client = tmp.addTransportAddress(new InetSocketTransportAddress("127.0.0.1", 9300));  

        //demo 100万数据  

        for (int i = 0; i < 1000000; i++)  

        {  

            client.prepareIndex("test2", "book",String.valueOf(i)).setSource("bookid", String.valueOf(i), "booktype", String.valueOf(i%10000)).execute()  

            .actionGet();  

        }  

        //demo 近1万个term  

        String[] values = new String[10000];  

        for (int i = 1; i < 10000; i++)  

        {  

            values[i] = String.valueOf(i);  

        }  

        //terms query  

        //TermsQueryBuilder termQueryBuilder = new TermsQueryBuilder("booktype", values);  

        TermsFilterBuilder termsFilterBuilder = new TermsFilterBuilder("booktype", values);  

//      SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(termQueryBuilder)  

//              .setFrom(0).setSize(100).execute().actionGet();  

        //terms filter  

        SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(QueryBuilders.matchAllQuery()).setFilter(termsFilterBuilder)  

                .setFrom(0).setSize(100).execute().actionGet();  

        SearchHits hits = searchResponse.getHits();  

        System.out.println(hits.totalHits());  

        for (SearchHit searchHit : hits)  

        {  

            System.out.println(searchHit.getId() + ":" + searchHit.getSource().get("booktype"));  

        }  

 上述结果会发现, 用TermsQueryBuilder查询的话, 会出现TooManyClauses的异常, 因为设置了9999个term值。因此,当term过多时,建议采用filter, 而不是query. 
 http://lucene-group.group.iteye.com/group/topic/10555 http://maxrocray.iteye.com/blog/1860946
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: