lucene之suggert

xiaoxiao2021-02-28  8

Lucene 关键词搜索

所需jar

Lucene-suggest-4.7.0.jar

Lucene-queryparser-4.7.0.jar

Lucene-misc-4.7.0.jar

Lucene-memory-4.7.0.jar

Lucene-highlighter-4.7.0.jar

Lucene-core-4.7.0.jar

Lucene-analyzers-common-4.7.0.jar

分词器

IKAnalyzer2012FF_u1.jar

效果图

代码:

package lucene;

import java.io.File;  

import java.io.IOException;  

import java.io.StringReader;  

import java.util.HashSet;  

import java.util.List;  

import java.util.Set;  

  

import org.apache.log4j.Logger;  

import org.apache.lucene.analysis.Analyzer;  

import org.apache.lucene.analysis.AnalyzerWrapper;  

import org.apache.lucene.analysis.TokenStream;  

import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;  

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;  

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;  

import org.apache.lucene.codecs.lucene46.Lucene46Codec;  

import org.apache.lucene.document.BinaryDocValuesField;  

import org.apache.lucene.document.Document;  

import org.apache.lucene.document.Field;  

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.FieldType;  

import org.apache.lucene.document.NumericDocValuesField;  

import org.apache.lucene.document.StringField;

import org.apache.lucene.index.AtomicReader;  

import org.apache.lucene.index.DirectoryReader;  

import org.apache.lucene.index.IndexReader;  

import org.apache.lucene.index.IndexWriter;  

import org.apache.lucene.index.IndexWriterConfig;  

import org.apache.lucene.index.IndexWriterConfig.OpenMode;  

import org.apache.lucene.index.MultiDocValues;  

import org.apache.lucene.index.SlowCompositeReaderWrapper;  

import org.apache.lucene.index.Term;  

import org.apache.lucene.search.BooleanClause;  

import org.apache.lucene.search.BooleanQuery;  

import org.apache.lucene.search.IndexSearcher;  

import org.apache.lucene.search.Query;  

import org.apache.lucene.search.Sort;  

import org.apache.lucene.search.SortField;  

import org.apache.lucene.search.TermQuery;  

import org.apache.lucene.search.TopDocs;  

import org.apache.lucene.search.suggest.InputIterator;  

import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;  

import org.apache.lucene.store.Directory;  

import org.apache.lucene.util.BytesRef;  

import org.apache.lucene.util.IOUtils;  

import org.apache.lucene.util.Version;  

  

  

public class MyAnalyzingInfixSuggester extends AnalyzingInfixSuggester {  

    /** 日志 **/  

    private final Logger logger = Logger.getLogger(MyAnalyzingInfixSuggester.class);  

  

    /** Field name used for the indexed text. */  

    public static final String TEXT_FIELD_NAME = "text";  

  

    /** Default minimum number of leading characters before 

     *  PrefixQuery is used (4). */  

    public static final int DEFAULT_MIN_PREFIX_CHARS = 4;  

    private final File indexPath;  

    final int minPrefixChars;  

    final Version matchVersion;  

    private final Directory dir;  

    /**索引创建方式(新建或追加)*/  

    private final OpenMode mode;  

  

    /* 

     * 重载 构造方法 初始化相关变量 

     * @param matchVersion  Lucene版本 

     * @param indexPath 索引文件目录 

     * @param analyzer 分词器 

     * @param mode 索引创建方式(新建或追加) 

     * @throws IOException  

     */  

    public MyAnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer, OpenMode mode) throws IOException {  

        //调用父类构造方法  

        super(matchVersion, indexPath, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);  

        this.mode = mode;  

        this.indexPath = indexPath;  

        this.minPrefixChars = DEFAULT_MIN_PREFIX_CHARS;  

        this.matchVersion = matchVersion;  

        dir = getDirectory(indexPath);  

    }  

  

    /* 

     * 重写获得IndexWriterConfig的方法 

     * 增加索引创建方式可变(新建或追加) 

     * @see org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester#getIndexWriterConfig(org.apache.lucene.util.Version, org.apache.lucene.analysis.Analyzer) 

     */  

    @Override  

    protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer) {  

        IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);  

        iwc.setCodec(new Lucene46Codec());  

        if (indexAnalyzer instanceof AnalyzerWrapper) {  

            //如果是tmp目录,采用新建方式打开索引文件  

            iwc.setOpenMode(OpenMode.CREATE);  

        } else {  

            iwc.setOpenMode(mode);  

        }  

        return iwc;  

    }  

  

    /* 

     * 重写查询方法,取消在建立索引时候进行排序 

     * @see org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester#build(org.apache.lucene.search.suggest.InputIterator) 

     */  

    @Override  

    public void build(InputIterator iter) throws IOException {  

        if (searcher != null) {  

            searcher.getIndexReader().close();  

            searcher = null;  

        }  

        Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));  

        IndexWriter w = null;  

        IndexWriter w2 = null;  

        AtomicReader r = null;  

        boolean success = false;  

        try {  

            Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {  

                @Override  

                protected Analyzer getWrappedAnalyzer(String fieldName) {  

                    return indexAnalyzer;  

                }  

  

                @Override  

                protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {  

                    if (fieldName.equals("textgrams") && minPrefixChars > 0) {  

                        return new TokenStreamComponents(components.getTokenizer(), new EdgeNGramTokenFilter(matchVersion, components.getTokenStream(), 1, minPrefixChars));  

                    } else {  

                        return components;  

                    }  

                }  

            };  

            w = new IndexWriter(dirTmp, getIndexWriterConfig(matchVersion, gramAnalyzer));  

            BytesRef text;  

            Document doc = new Document();  

            FieldType ft = getTextFieldType(); 

//              Field.Store.YES 

               

            Field textField = new Field(TEXT_FIELD_NAME, "", ft);  

            doc.add(textField);  

             

            Field textGramField = new Field("textgrams", "", ft);  

            doc.add(textGramField);  

  

            Field textDVField = new BinaryDocValuesField(TEXT_FIELD_NAME, new BytesRef());  

            doc.add(textDVField);  

            Field wordDVField = new StringField("word", "", Field.Store.YES);

            doc.add(wordDVField);

            Field weightField = new NumericDocValuesField("weight", 0);  

            doc.add(weightField);  

            Field countField = new StringField("count", "0", Field.Store.YES);

            doc.add(countField);  

            Field payloadField;  

            if (iter.hasPayloads()) {  

                payloadField = new BinaryDocValuesField("payloads", new BytesRef());  

                doc.add(payloadField);  

            } else {  

                payloadField = null;  

            }  

            long t0 = System.nanoTime();  

            while ((text = iter.next()) != null) {  

                String textString = text.utf8ToString();  

                textField.setStringValue(textString);  

                wordDVField.setStringValue(textString);

                textGramField.setStringValue(textString);  

                textDVField.setBytesValue(text);  

                weightField.setLongValue(iter.weight());  

                if (iter.hasPayloads()) {  

                    payloadField.setBytesValue(iter.payload());  

                }  

                w.addDocument(doc);  

            }  

            logger.debug("initial indexing time: " + ((System.nanoTime() - t0) / 1000000) + " msec");  

  

            r = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(w, false));  

            w.rollback();  

  

            w2 = new IndexWriter(dir, getIndexWriterConfig(matchVersion, indexAnalyzer));  

            w2.addIndexes(new IndexReader[] { r });  

            r.close();  

  

            searcher = new IndexSearcher(DirectoryReader.open(w2, false));  

            w2.close();  

  

            payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");  

            weightsDV = MultiDocValues.getNumericValues(searcher.getIndexReader(), "weight");  

            textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);  

            assert textDV != null;  

            success = true;  

        } finally {  

            if (success) {  

                IOUtils.close(w, w2, r, dirTmp);  

            } else {  

                IOUtils.closeWhileHandlingException(w, w2, r, dirTmp);  

            }  

        }  

    }  

  

    /* 

     * 重写查询方法,改变结果排序的方法 

     * @see org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester#lookup(java.lang.CharSequence, int, boolean, boolean) 

     */  

    @Override  

    public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) {  

  

        if (searcher == null) {  

            throw new IllegalStateException("suggester was not built");  

        }  

  

        final BooleanClause.Occur occur;  

        if (allTermsRequired) {  

            occur = BooleanClause.Occur.MUST;  

        } else {  

            occur = BooleanClause.Occur.SHOULD;  

        }  

  

        TokenStream ts = null;  

        try {  

            ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()));  

            ts.reset();  

            final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);  

            final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);  

            String lastToken = null;  

            BooleanQuery query = new BooleanQuery();  

            int maxEndOffset = -1;  

            final Set<String> matchedTokens = new HashSet<String>();  

            while (ts.incrementToken()) {  

                if (lastToken != null) {  

                    matchedTokens.add(lastToken);  

                    query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);  

                }  

                lastToken = termAtt.toString();  

                if (lastToken != null) {  

                    maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());  

                }  

            }  

            ts.end();  

  

            String prefixToken = null;  

            if (lastToken != null) {  

                Query lastQuery;  

                if (maxEndOffset == offsetAtt.endOffset()) {  

                    // Use PrefixQuery (or the ngram equivalent) when  

                    // there was no trailing discarded chars in the  

                    // string (e.g. whitespace), so that if query does  

                    // not end with a space we show prefix matches for  

                    // that token:  

                    lastQuery = getLastTokenQuery(lastToken);  

                    prefixToken = lastToken;  

                } else {  

                    // Use TermQuery for an exact match if there were  

                    // trailing discarded chars (e.g. whitespace), so  

                    // that if query ends with a space we only show  

                    // exact matches for that term:  

                    matchedTokens.add(lastToken);  

                    lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));  

                }  

                if (lastQuery != null) {  

                    query.add(lastQuery, occur);  

                }  

            }  

            ts.close();  

  

            

            Query finalQuery = finishQuery(query, allTermsRequired);  

  

            //新建排序方法  

            Sort sort = new Sort(new SortField("weight", SortField.Type.LONG, true));  

            TopDocs hits = searcher.search(finalQuery, num, sort);  

  

            List<LookupResult> results = createResults(hits, num, key, doHighlight, matchedTokens, prefixToken);  

            return results;  

        } catch (IOException ioe) {  

            throw new RuntimeException(ioe);  

        } finally {  

            IOUtils.closeWhileHandlingException(ts);  

        }  

    }  

    public List<LookupResult> lookup(  int num, boolean allTermsRequired, boolean doHighlight ) {  

        if (searcher == null) {  

            throw new IllegalStateException("suggester was not built");  

        }  

  

        final Set<String> matchedTokens = new HashSet<String>();  

        String prefixToken = null;  

        final BooleanClause.Occur occur;  

        occur = BooleanClause.Occur.SHOULD;  

  

        TokenStream ts = null;  

        try {  

          

            BooleanQuery query = new BooleanQuery();  

            Query  termQuery = new TermQuery(new Term("count", "0"));

            query.add(   termQuery, occur);  

            Query finalQuery = finishQuery(query, allTermsRequired);  

  

            //新建排序方法  

            Sort sort = new Sort(new SortField("weight", SortField.Type.LONG, true));  

            TopDocs hits = searcher.search(finalQuery, num, sort);  

  

            List<LookupResult> results = createResults(hits, num, null, doHighlight, matchedTokens, prefixToken);  

            return results;  

        } catch (IOException ioe) {  

            throw new RuntimeException(ioe);  

        } finally {  

            IOUtils.closeWhileHandlingException(ts);  

        }  

    }  

}  

package lucene;

import java.io.ByteArrayOutputStream;  

import java.io.IOException;  

import java.io.ObjectOutputStream;  

import java.io.UnsupportedEncodingException;  

import java.util.Comparator;  

import java.util.HashSet;  

import java.util.Iterator;  

import java.util.Set;  

import org.apache.lucene.search.suggest.InputIterator;  

import org.apache.lucene.util.BytesRef;  

public class ProductIterator implements InputIterator {  

 

//集合的迭代器

private Iterator<VO> productIterator;  

//遍历的当前的suggerter

private VO currentProduct;  

//

public ProductIterator(Iterator<VO> productIterator) {  

      this.productIterator = productIterator;  

}  

  public boolean hasContexts() {  

      return true;  

  }  

  /** 

   * 是否有设置payload信息 

   */  

  public boolean hasPayloads() {  

      return true;  

  }  

  public Comparator<BytesRef> getComparator() {  

      return null;  

  }  

  public BytesRef next() {  

      if (productIterator.hasNext()) {  

          currentProduct = productIterator.next();  

          try {  

              //返回当前Projectname值,把product类的name属性值作为key  

              return new BytesRef(currentProduct.getTerm().getBytes("UTF8"));  

          } catch (UnsupportedEncodingException e) {  

              throw new RuntimeException("Couldn't convert to UTF-8",e);  

          }  

      } else {  

          return null;  

      }  

  }  

  /** 

   * Product对象序列化存入payload 

   * [这里仅仅是个示例,其实这种做法不可取,一般不会把整个对象存入payload,这样索引体积会很大,浪费硬盘空间

   * 存其他后期需要取出的各种数据  

   */  

  

  public BytesRef payload() {  

      try {  

          ByteArrayOutputStream bos = new ByteArrayOutputStream();  

          ObjectOutputStream out = new ObjectOutputStream(bos);  

          out.writeObject(currentProduct);  

//          out.writeInt(currentProduct.getTimes());  

          out.close();  

          return new BytesRef(bos.toByteArray());  

      } catch (IOException e) {  

          throw new RuntimeException("Well that's unfortunate.");  

      }  

  }  

  /** 

   * 把产品的销售区域存入contextcontext里可以是任意的自定义数据,一般用于数据过滤 

   * Set集合里的每一个元素都会被创建一个TermQuery,你只是提供一个Set集合,至于new TermQuery 

   * VO底层API去做了,但你必须要了解底层干了些什么 

   */  

  public Set<BytesRef> contexts() {  

      try {  

         Set<BytesRef> regions = new HashSet<BytesRef>();  

          //for (String region : currentProduct.getStlist()) {  

              regions.add(new BytesRef(currentProduct.getTerm().getBytes("UTF8")));  

           //}  

          return regions;  

      } catch (UnsupportedEncodingException e) {  

          throw new RuntimeException("Couldn't convert to UTF-8");  

      }    

  }  

  /** 

   * 返回权重值,这个值会影响排序 

   * 这里以产品的销售量作为权重值,weight值即最终返回的热词列表里每个热词的权重值 

   * 怎么设计返回这个权重值,发挥你们的想象力吧 

   */  

  public long weight() {  

     return  currentProduct.getTimes();

  }  

}  

package lucene;

import java.io.ByteArrayInputStream;

import java.io.File;

import java.io.IOException;

import java.io.ObjectInputStream;

import java.util.ArrayList;

import java.util.Collections;

import java.util.Comparator;

import java.util.HashSet;

import java.util.List;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.IndexWriterConfig.OpenMode;

import org.apache.lucene.index.Term;

import org.apache.lucene.search.FuzzyTermsEnum;

import org.apache.lucene.search.TermQuery;

import org.apache.lucene.search.suggest.Lookup.LookupResult;

import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;

import org.apache.lucene.search.suggest.analyzing.FuzzySuggester;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.BytesRef;

import org.apache.lucene.util.Version;

import org.wltea.analyzer.lucene.IKAnalyzer;

import document;

public class LuceneSuggest {

private static final Version VERSION = Version.LUCENE_47;

public   void indexmake( List<VO>  lucenelist,File indexDir,Analyzer analyzer,Version VERSION, OpenMode create ) throws IOException{

MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(VERSION, indexDir, analyzer,OpenMode.CREATE_OR_APPEND);

try {

suggester.build(new ProductIterator(lucenelist.iterator()));

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}finally {  

            //关闭  

suggester.close();  

        } 

}

 

public    List<VO>  lookup(String name, String region,int count,String orgno,String indexDir,Analyzer analyzer,Version VERSION ,OpenMode create ) throws IOException {

MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(VERSION, new File(indexDir+orgno), analyzer,OpenMode.CREATE_OR_APPEND);

 

List<LookupResult> lookup = suggester.lookup( count,false,false );

List<VO> lulist=new ArrayList<VO>();

 HashSet<BytesRef> contexts = new HashSet<BytesRef>(); 

/*contexts.add(new BytesRef(region.getBytes("UTF8"))); */

// 先以contexts为过滤条件进行过滤,再以name为关键字进行筛选,根据weight值排序返回前2

// 3个布尔值即是否每个Term都要匹配,第4个参数表示是否需要关键字高亮         //5  最大长度

 /* 

         *   查询结果     

         *     name- 查询的关键词 

         *     count- 返回的最多数量 

         *     allTermsRequired - should或者must关系 

         *     doHighlight - 高亮 

         */  

List<LookupResult> results =  suggester.lookup(name ,count,false,false);

System.out.println("-- \"" + name + "\" (" + region + "):");

for (LookupResult result : results) {

System.out.println(result.key);

 String str = (String) result.highlightKey; 

 str=(String) result.key;

 Integer time = null;  

// payload中反序列化出Product对象

BytesRef bytesRef = result.payload;

ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(bytesRef.bytes));

try {

VO vo   = (VO) is.readObject() ;

lulist.add(vo);

} catch (Exception e) {

e.printStackTrace();

}

}

suggester.close();

System.out.println("结束");

if(!"00".equals(orgno)){

MyAnalyzingInfixSuggester suggester1 = new MyAnalyzingInfixSuggester(VERSION, new File(indexDir+"00"), analyzer,OpenMode.CREATE_OR_APPEND);

 /* 

         *   查询结果     

         *     name- 查询的关键词 

         *     count- 返回的最多数量 

         *     allTermsRequired - should或者must关系 

         *     doHighlight - 高亮 

         */  

List<LookupResult> results1 =  suggester1.lookup(name ,count,false,false);

System.out.println("-- \"" + name + "\" (" + region + "):");

for (LookupResult result : results1) {

System.out.println(result.key);

 String str = (String) result.highlightKey; 

 str=(String) result.key;

 Integer time = null;  

// payload中反序列化出Product对象

BytesRef bytesRef = result.payload;

ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(bytesRef.bytes));

try {

VO vo   = (VO) is.readObject() ;

lulist.add(vo);

} catch (Exception e) {

e.printStackTrace();

}

}

suggester.close();

}

 Collections.sort(lulist, new Comparator<VO>() {  

            @Override  

            public int compare(VO o1, VO o2) {  

                int i = o1.getTimes() - o2.getTimes();  

                return i;  

            }  

        });  

 Collections.reverse(lulist);

 if(lulist.size() > count){ 

 lulist.subList(0, count);//取前count 

 }

return lulist;

}

/**

 * 

 * @param count

 * @param orgno

 * @param indexDir

 * @param analyzer

 * @param VERSION

 * @param create

 * @return 查询全部数据  (思路定义了定值count 为 0) 默认查询

 * @throws IOException

 */

public    List<VO>  lookup1(  int count,String orgno,String indexDir,Analyzer analyzer,Version VERSION ,OpenMode create ) throws IOException {

MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(VERSION, new File(indexDir+orgno), analyzer,OpenMode.CREATE_OR_APPEND);

List<VO> lulist=new ArrayList<VO>();

 

 List<LookupResult> results = suggester.lookup( count,false,false );

for (LookupResult result : results) {

System.out.println(result.key);

 String str = (String) result.highlightKey

 str=(String) result.key;

 Integer time = null;  

// 从payload中反序列化出Product对象

BytesRef bytesRef = result.payload;

ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(bytesRef.bytes));

try {

VO vo   = (VO) is.readObject() ;

lulist.add(vo);

catch (Exception e) {

e.printStackTrace();

}

}

suggester.close();

System.out.println("结束");

 Collections.sort(lulist, new Comparator<VO>() {  

            @Override  

            public int compare(VO o1, VO o2) {  

                int i = o1.getTimes() - o2.getTimes();  

                return i;  

            }  

        });  

 Collections.reverse(lulist);

 if(lulist.size() > count){ 

 lulist.subList(0, count);//取前count 条

 }

return lulist;

}

 

/**

 * 跟新

 * @return

 * @throws IOException 

 */

public    void  edit(String word,File indexDir,Analyzer analyzer,Version VERSION ,VO vo) throws IOException{

Directory fsDir = FSDirectory.open(indexDir);     

IndexWriter indexWriter = new IndexWriter(fsDir, new IndexWriterConfig( VERSION, analyzer));  

    //删除对应的词条  

     TermQuery termQuery = new TermQuery(new Term("word", word));

//     indexWriter.deleteDocuments(new Term(MyAnalyzingInfixSuggester.TEXT_FIELD_NAME, word));  

     indexWriter.deleteDocuments(termQuery);

    //彻底删除  

    indexWriter.forceMergeDeletes();  

    //关闭IndexWriter  

    indexWriter.commit();  

    indexWriter.close();  

    List<VO> list = new ArrayList<VO>();  

    list.add(vo);  

    //添加建立新的词条索引  

    this.indexmake(list, indexDir,analyzer, VERSION,OpenMode.APPEND);

}

public    void  deleteSuggert( String text,File indexDir  ) throws IOException{

Analyzer analyzer = new IKAnalyzer(false);

Directory fsDir = FSDirectory.open(indexDir);     

IndexWriter indexWriter = new IndexWriter(fsDir, new IndexWriterConfig( VERSION, analyzer));  

    //删除对应的词条  

     TermQuery termQuery = new TermQuery(new Term("word",text));

//     indexWriter.deleteDocuments(new Term(MyAnalyzingInfixSuggester.TEXT_FIELD_NAME, word));  

     indexWriter.deleteDocuments(termQuery);

    //彻底删除  

    indexWriter.forceMergeDeletes();  

    //关闭IndexWriter  

    indexWriter.commit();  

    indexWriter.close();  

  

}

}

package  lucene;

import java.io.Serializable;

public class VO implements Serializable{

 

private static final long serialVersionUID = 1L;

String term;  

    int times;  

    /** 

     * @param term  词条 

     * @param times  词频 

     */  

    public VO(String term, int times) {  

        this.term = term;  

        this.times = times;  

    }  

    public VO() {  

        super();  

    }  

    /** 

     * @return the term 

     */  

    public String getTerm() {  

        return term;  

    }  

    /** 

     * @param term the term to set 

     */  

    public void setTerm(String term) {  

        this.term = term;  

    }  

    /** 

     * @return the times 

     */  

    public int getTimes() {  

        return times;  

    }  

    /** 

     * @param times the times to set 

     */  

    public void setTimes(int times) {  

        this.times = times;  

    }  

    /* (non-Javadoc

     * @see java.lang.Object#toString() 

     */  

    @Override  

    public String toString() {  

        return term + " " + times;  

    }  

    /* (non-Javadoc

     * @see java.lang.Object#hashCode() 

     */  

    @Override  

    public int hashCode() {  

        final int prime = 31;  

        int result = 1;  

        result = prime * result + ((term == null) ? 0 : term.hashCode());  

        return result;  

    }  

    /* 

     * 只对比term 

     * @see java.lang.Object#equals(java.lang.Object) 

     */  

    @Override  

    public boolean equals(Object obj) {  

        if (this == obj)  

            return true;  

        if (obj == null)  

            return false;  

        if (getClass() != obj.getClass())  

            return false;  

        VO other = (VO) obj;  

        if (term == null) {  

            if (other.term != null)  

                return false;  

        } else if (!term.equals(other.term))  

            return false;  

        return true;  

    }  

}  

1.MyAnalyzingInfixSuggester 是重写AnalyzingInfixSuggester 由于原代码不追加索引,采用新建OpenMode.CREATE,所以重写

2.Document 为创建的数据

Main 

Word为搜索的key  第二各参数可以是null 作用是进行过滤,搜索10条,orgnoIndexWPath 主要是拼地址 ,分词器false粒度分词,为true是智能分词

List<VO> lookup = luceneSuggest.lookup(word, null, 10,orgno,  IndexWPath , new IKAnalyzer(false),  Version.LUCENE_47,OpenMode.CREATE_OR_APPEND);

词频排序

VO vo= new VO();

vo.setTerm(word);

vo.setTimes(Integer.valueOf(terms)+1);

LuceneSuggest luceneSuggest = new LuceneSuggest();

luceneSuggest.edit(word, new File(IndexWPath), new IKAnalyzer(false),Version.LUCENE_47, vo);

转载请注明原文地址: https://www.6miu.com/read-200077.html

最新回复(0)