日常开发中,相信大家经常会用like去匹配一些数据,同时我们也知道,like往往会导致全表扫描,当数据量越来越大的时候,我们会纠结于数据库的龟速查找,此时我们必须另寻蹊跷,这时lucene就可以大显身手了。
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import com.test.Article;
public class HelloWorld
{
@Test
public void testCreate() throws Exception
{
/**
* 1、创建一个article对象,并且把信息存放进去
* 2、调用indexWriter的API把数据存放在索引库中
* 3、关闭indexWriter
*/
//创建一个article对象,并且把信息存放进去
Article article = new Article();
article.setId(1L);
article.setTitle(“java goodnice”);
article .setContent(“多年来就是这么吊”);
//2、调用indexWriter的API把数据存放在索引库中
/**
* 创建一个IndexWriter
* 参数三个
* 1、索引库 指向索引库的位置
* 2、分词器
*/
//创建索引库
Directory directory = FSDirectory.open(new File(“./indexDir”));
//创建分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
//把一个对象转换成document
Document document = new Document();
Field idField = new Field(“id”, article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
Field titleField = new Field(“title”, article.getTitle(), Store.YES, Index.ANALYZED);
Field contentField = new Field(“content”, article.getContent(), Store.YES, Index.ANALYZED);
document.add(idField);
document.add(titleField);
document.add(contentField);
indexWriter.addDocument(document);
//3、关闭indexWriter
indexWriter.close();
}
@Test
public void testSearchIndex() throws Exception
{
/**
* 1.创建一个 indexSerach对象
* 2.调用search方法进行检索
* 3.输出内容
*/
// 1.创建一个 indexSerach对象
//–索引库
Directory directory = FSDirectory.open(new File(“./indexDir”));
IndexSearcher searcher = new IndexSearcher(directory);
//2..调用search方法进行检索
//–
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
QueryParser queryParser = new QueryParser(Version.LUCENE_30, “id”, analyzer);
//设置搜索的关键字
Query query = queryParser.parse(“1”);
TopDocs topDocs = searcher.search(query, 10);
//获得根据关键字查询出来的总的记录数
int count = topDocs.totalHits;
List<Article> articles = new ArrayList<Article>();
//获得数组
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs)
{
//关键字得分
float score = scoreDoc.score;
//索引的下标
int index = scoreDoc.doc;
//根据索引获得document对象
Document document = searcher.doc(index);
//把document转化成article
Article article = new Article();
article.setId(Long.parseLong(document.get(“id”)));
article.setTitle(document.get(“title”));
article.setContent(document.get(“content”));
articles.add(article);
}
for(Article article : articles)
{
System.out.println(article.getId());
System.out.println(article.getTitle());
System.out.println(article.getContent());
}
}
}
两个工具类
package com.kite.luncene.utils;
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LunceneUtils
{
//索引库
public static Directory directory;
//分词器
public static Analyzer analyzer;
static
{
try
{
directory = FSDirectory.open(new File(“./indexDor”));
analyzer = new StandardAnalyzer(Version.LUCENE_30);
} catch (Exception e)
{
e.printStackTrace();
}
}
}
package com.kite.luncene.utils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import com.testArticle;
public class DocumentUtils
{
/**
* 通过 article获得document
* @param article
* @return
*/
public static Document articleToDocument(Article article)
{
Document document = new Document();
Field idField = new Field(“id”, article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
Field titleField = new Field(“title”, article.getTitle(), Store.YES, Index.ANALYZED);
Field contentField = new Field(“content”, article.getContent(), Store.YES, Index.ANALYZED);
document.add(idField);
document.add(titleField);
document.add(contentField);
return document;
}
/**
* 通过document 获得article对象
* @param document
* @return
*/
public static Article documentToArticle(Document document)
{
Article article = new Article();
article.setId(Long.parseLong(document.get(“id”)));
article.setTitle((document.get(“title”)));
article.setContent(document.get(“content”));
return article;
}
}
实用工具类实现简单的增删改查功能
package com.kite.luncene.index;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.junit.Test;
import com.testArticle;
import com.kite.luncene.utils.DocumentUtils;
import com.kite.luncene.utils.LunceneUtils;
public class ArticleIndex
{
@Test
public void testCreateIndex() throws Exception
{
IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
Article article = new Article();
article.setId(1L);
article.setTitle(“luncenes是一个好难写的东西”);
article.setContent(“百度,谷歌是很好的搜索引擎”);
//通过工具类转换成document
Document document = DocumentUtils.articleToDocument(article);
indexWriter.addDocument(document);
indexWriter.close();
}
@Test
public void testSearchIndex() throws Exception
{
IndexSearcher indexSearcher = new IndexSearcher(LunceneUtils.directory);
/**
* Version.LUCENE_30 版本
* “title” 根据那个字段
* LunceneUtils.analyzer 分词器
*/
QueryParser queryParser = new QueryParser(Version.LUCENE_30, “title”, LunceneUtils.analyzer);
//luncene 关键字
Query query = queryParser.parse(“luncene”);
TopDocs topDocs = indexSearcher.search(query, 2);
//获得根据关键字查询到的所有的记录数
int count = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
List<Article> articles = new ArrayList<Article>();
for(ScoreDoc scoreDoc : scoreDocs)
{
//scoreDoc.score 获得关键字得分
float score = scoreDoc.score;
//scoreDoc.doc 获得索引的下标
int index = scoreDoc.doc;
//通过索引的下标进行查询
Document document = indexSearcher.doc(index);
Article article = DocumentUtils.documentToArticle(document);
articles.add(article);
}
//遍历输出
for(Article article : articles)
{
System.out.println(article.getId().toString());
System.err.println(article.getTitle());
System.out.println(article.getContent());
}
}
/**
* 修改是先删除 然后进行添加
* @throws Exception
*/
@Test
public void testDeleteIndex() throws Exception
{
IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
//indexWriter.deleteAll();–删除所有
/*
* term 关键字对象
* title 字段名
* luncenes 关键字的内容
*/
Term term = new Term(“title”, “luncenes”);
//根据关键字进行删除 会在文件夹中增加一个.del结尾的文件
indexWriter.deleteDocuments(term);
indexWriter.close();
}
@Test
public void testUpdateIndex() throws Exception
{
IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
Term term = new Term(“title”,”luncenes”);
Article article = new Article();
article.setId(1L);
article.setTitle(“luncene是一个好难写的东西,少个s不解释”);
article.setContent(“百度,谷歌是很好的搜索引擎”);
/*
* term 根据关键字进行修改
* doc 修改后的内容
*/
indexWriter.updateDocument(term, DocumentUtils.articleToDocument(article));
indexWriter.close();
}
}
转载请注明来源网站:blog.ytso.com谢谢!
原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/14699.html