关于搜素引擎lucene简单实用详解编程语言

关于搜素引擎lucene简单实用详解编程语言
日常开发中,相信大家经常会用like去匹配一些数据,同时我们也知道,like往往会导致全表扫描,当数据量越来越大的时候,我们会纠结于数据库的龟速查找,此时我们必须另寻蹊跷,这时lucene就可以大显身手了。
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import com.test.Article;
public class HelloWorld
{
    @Test
    public void testCreate() throws Exception
    {
        /**
         * 1、创建一个article对象,并且把信息存放进去
         * 2、调用indexWriter的API把数据存放在索引库中
         * 3、关闭indexWriter
         */
        //创建一个article对象,并且把信息存放进去
        Article article = new Article();
        article.setId(1L);
        article.setTitle(“java goodnice”);
        article .setContent(“多年来就是这么吊”);
        //2、调用indexWriter的API把数据存放在索引库中
         /**
            * 创建一个IndexWriter
            *    参数三个
            *       1、索引库   指向索引库的位置
            *       2、分词器
            */
            //创建索引库
            Directory directory = FSDirectory.open(new File(“./indexDir”));
            //创建分词器
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
        //把一个对象转换成document
        Document document = new Document();
        Field idField = new Field(“id”, article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
        Field titleField = new Field(“title”, article.getTitle(), Store.YES, Index.ANALYZED);
        Field contentField = new Field(“content”, article.getContent(), Store.YES, Index.ANALYZED);
        document.add(idField);
        document.add(titleField);
        document.add(contentField);
        indexWriter.addDocument(document);
        //3、关闭indexWriter
        indexWriter.close();
    }
    @Test
    public  void testSearchIndex() throws Exception
    {
        /**
         * 1.创建一个 indexSerach对象
         * 2.调用search方法进行检索
         * 3.输出内容
         */
        // 1.创建一个 indexSerach对象
        //–索引库
        Directory directory = FSDirectory.open(new File(“./indexDir”));
        IndexSearcher searcher = new IndexSearcher(directory);
        //2..调用search方法进行检索
        //–
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        QueryParser queryParser = new QueryParser(Version.LUCENE_30, “id”, analyzer);
        //设置搜索的关键字
        Query query = queryParser.parse(“1”);
        TopDocs topDocs = searcher.search(query, 10);
        //获得根据关键字查询出来的总的记录数
        int count = topDocs.totalHits;
        List<Article> articles = new ArrayList<Article>();
        //获得数组
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for(ScoreDoc scoreDoc : scoreDocs)
        {
            //关键字得分
            float score = scoreDoc.score;
            //索引的下标
            int index = scoreDoc.doc;
            //根据索引获得document对象
            Document document = searcher.doc(index);
            //把document转化成article
            Article article = new Article();
            article.setId(Long.parseLong(document.get(“id”)));
            article.setTitle(document.get(“title”));
            article.setContent(document.get(“content”));
            articles.add(article);
        }
        for(Article article : articles)
        {
            System.out.println(article.getId());
            System.out.println(article.getTitle());
            System.out.println(article.getContent());
        }
    }
}
两个工具类
package com.kite.luncene.utils;
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LunceneUtils
{
    //索引库
    public static Directory directory;
    //分词器
    public static Analyzer analyzer;
    static
    {
        try
        {
            directory = FSDirectory.open(new File(“./indexDor”));
            analyzer = new StandardAnalyzer(Version.LUCENE_30);
        } catch (Exception e)
        {
            e.printStackTrace();
        }
    }
}
package com.kite.luncene.utils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import com.testArticle;
public class DocumentUtils
{
    /**
     * 通过 article获得document
     * @param article
     * @return
     */
    public static Document articleToDocument(Article article)
    {
        Document document = new Document();
        Field idField = new Field(“id”, article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
        Field titleField = new Field(“title”, article.getTitle(), Store.YES, Index.ANALYZED);
        Field  contentField = new Field(“content”, article.getContent(), Store.YES, Index.ANALYZED);
        document.add(idField);
        document.add(titleField);
        document.add(contentField);
        return document;
    }
    /**
     * 通过document 获得article对象
     * @param document
     * @return
     */
    public static Article documentToArticle(Document document) 
    {
        Article article = new Article();
        article.setId(Long.parseLong(document.get(“id”)));
        article.setTitle((document.get(“title”)));
        article.setContent(document.get(“content”));
        return article;
    }
}
实用工具类实现简单的增删改查功能
package com.kite.luncene.index;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.junit.Test;
import com.testArticle;
import com.kite.luncene.utils.DocumentUtils;
import com.kite.luncene.utils.LunceneUtils;
public class ArticleIndex
{
    @Test
    public void testCreateIndex() throws Exception
    {
        IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
        Article article = new Article();
        article.setId(1L);
        article.setTitle(“luncenes是一个好难写的东西”);
        article.setContent(“百度,谷歌是很好的搜索引擎”);
        //通过工具类转换成document
        Document document = DocumentUtils.articleToDocument(article);
        indexWriter.addDocument(document);
        indexWriter.close();
    }
    @Test
    public void testSearchIndex() throws Exception
    {
        IndexSearcher indexSearcher = new IndexSearcher(LunceneUtils.directory);
        /**
         * Version.LUCENE_30 版本
         * “title”   根据那个字段
         * LunceneUtils.analyzer  分词器
         */
        QueryParser queryParser = new QueryParser(Version.LUCENE_30, “title”, LunceneUtils.analyzer);
        //luncene  关键字
        Query query = queryParser.parse(“luncene”);
        TopDocs topDocs = indexSearcher.search(query, 2);
        //获得根据关键字查询到的所有的记录数
        int count = topDocs.totalHits;
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        List<Article> articles = new ArrayList<Article>();
        for(ScoreDoc scoreDoc : scoreDocs)
        {
            //scoreDoc.score 获得关键字得分
            float score = scoreDoc.score;
            //scoreDoc.doc 获得索引的下标
            int index = scoreDoc.doc;
            //通过索引的下标进行查询
            Document document = indexSearcher.doc(index);
            Article article = DocumentUtils.documentToArticle(document);
            articles.add(article);
        }
        //遍历输出
        for(Article article : articles)
        {
            System.out.println(article.getId().toString());
            System.err.println(article.getTitle());
            System.out.println(article.getContent());
        }
    }
    /**
     * 修改是先删除 然后进行添加
     * @throws Exception
     */
    @Test
    public void testDeleteIndex() throws Exception
    {
        IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
        //indexWriter.deleteAll();–删除所有
        /*
         * term  关键字对象     
         *             title  字段名
         *             luncenes    关键字的内容
         */
        Term term = new Term(“title”, “luncenes”);
        //根据关键字进行删除   会在文件夹中增加一个.del结尾的文件
        indexWriter.deleteDocuments(term);
        indexWriter.close();
    }
    @Test
    public void testUpdateIndex() throws Exception
    {
        IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
        Term term = new Term(“title”,”luncenes”);
        Article article = new Article();
        article.setId(1L);
        article.setTitle(“luncene是一个好难写的东西,少个s不解释”);
        article.setContent(“百度,谷歌是很好的搜索引擎”);
        /*
         * term 根据关键字进行修改 
         * doc   修改后的内容
         */
        indexWriter.updateDocument(term, DocumentUtils.articleToDocument(article));
        indexWriter.close();
    }
}

关于搜素引擎lucene简单实用详解编程语言

转载请注明来源网站:blog.ytso.com谢谢!

原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/14699.html

(0)
上一篇 2021年7月19日
下一篇 2021年7月19日

相关推荐

发表回复

登录后才能评论