全文检索Lucene(四)—Compass框架详解编程语言

Compass是一个强大的,事务的,高性能的对象/搜索引擎映射(OSEM:object/search engine mapping)与一个Java持久层框架。
Compass之于Lucene,就像Hibernate之于JDBC,Compass就是把Lucene封装了一层。
Compass目前版本是2.2.0,已经很久没有更新与维护。Compass2.2的版本所对应的Lucene2.4.1。

下载地址:http://www.compass-project.org/
解压后的目录结构:
这里写图片描述

开发的基本jar包在dist及其子目录下查找,主要如下,
这里写图片描述

代码示例:
Article.java

package com.my.bean; 
 
import org.compass.annotations.Index; 
import org.compass.annotations.Searchable; 
import org.compass.annotations.SearchableBoostProperty; 
import org.compass.annotations.SearchableId; 
import org.compass.annotations.SearchableProperty; 
import org.compass.annotations.Store; 
 
@Searchable 
public class Article { 
    // [email protected],默认是不可以进行查询的,在指定了name参数后,就可以使用这个属性查询了 
    // 对于数字的属性,可以指定format为若干个0,表示要存成几个长度,如果不足这个长度,前面用'0'补齐 
    @SearchableId(name = "id", format = "00000000") 
    private Integer id; 
    @SearchableProperty(name = "title", store = Store.YES, index = Index.ANALYZED) 
    private String title; 
    @SearchableProperty(name = "content", store = Store.YES, index = Index.ANALYZED) 
    private String content; 
    @SearchableBoostProperty 
    private float boostValue = 1F; 
 
    public Integer getId() { 
        return id; 
    } 
 
    public void setId(Integer id) { 
        this.id = id; 
    } 
 
    public String getTitle() { 
        return title; 
    } 
 
    public void setTitle(String title) { 
        this.title = title; 
    } 
 
    public String getContent() { 
        return content; 
    } 
 
    public void setContent(String content) { 
        this.content = content; 
    } 
 
    public float getBoostValue() { 
        return boostValue; 
    } 
 
    public void setBoostValue(float boostValue) { 
        this.boostValue = boostValue; 
    } 
 
} 
 

HelloWorld.java

package com.my.compass; 
 
import java.util.ArrayList; 
import java.util.List; 
 
import org.compass.core.Compass; 
import org.compass.core.CompassHits; 
import org.compass.core.CompassSession; 
import org.compass.core.CompassTransaction; 
import org.compass.core.config.CompassConfiguration; 
import org.junit.Test; 
 
import com.my.bean.Article; 
 
public class HelloWorld { 
 
    private CompassConfiguration cfg = new CompassConfiguration().configure(); 
    private Compass compassSessionFactory = cfg.buildCompass(); 
 
    // 建立索引(模拟在贴吧中发表了一个文章,会保存到数据库中,并且应该建立索引,以便能搜索到) 
    @Test 
    public void createIndex() throws Exception { 
        // 模拟一条刚保存到数据库中的数据 
        Article article = new Article(); 
        article.setId(1); 
        article.setTitle("Lucene是全文检索的框架"); 
        article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。"); 
 
        // 建立索引 ? 
        CompassSession session = compassSessionFactory.openSession(); 
        CompassTransaction tx = session.beginTransaction(); 
        session.create(article); // 创建索引 
        tx.commit(); 
        session.close(); 
    } 
 
    // 搜索 
    @Test 
    public void search() throws Exception { 
        // 搜索条件 
//      String queryString = "lucene"; 
         String queryString = "compass"; 
 
        // 进行搜索,得到结果 ? 
        List<Article> list = new ArrayList<Article>(); 
        CompassSession session = compassSessionFactory.openSession(); 
        CompassTransaction tx = session.beginTransaction(); 
 
        CompassHits hits = session.find(queryString); 
        // int count = hits.length(); // 总结果数 
        for (int i = 0; i < hits.length(); i++) { 
            Article article = (Article) hits.data(i); 
            // hits.score(i); 
            list.add(article); 
        } 
 
        tx.commit(); 
        session.close(); 
 
        // 显示结果 
        System.out.println("总结果数量为:" + list.size()); 
        for (Article article : list) { 
            System.out.println("--------> id = " + article.getId()); 
            System.out.println("title  = " + article.getTitle()); 
            System.out.println("content= " + article.getContent()); 
        } 
    } 
} 

compass.cfg.xml

<?xml version="1.0" encoding="utf-8"?> 
<compass-core-config xmlns="http://www.compass-project.org/schema/core-config" 
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
    xsi:schemaLocation="http://www.compass-project.org/schema/core-config 
           http://www.compass-project.org/schema/compass-core-config-2.2.xsd"> 
 
    <compass name="default"> 
        <!-- 连接信息 --> 
        <connection> 
            <file path="./indexDir/" /> 
        </connection> 
 
        <!-- 声映射信息 --> 
        <mappings> 
            <class name="com.my.bean.Article" /> 
        </mappings> 
 
        <!-- 其他配置  --> 
        <settings> 
            <!-- 配置高亮器:前缀 --> 
            <setting name="compass.engine.highlighter.default.formatter.simple.pre" value="&lt;span class='keyword'&gt;" /> 
            <!-- 配置高亮器:后缀 --> 
            <setting name="compass.engine.highlighter.default.formatter.simple.post" value="&lt;/span&gt;" /> 
            <!-- 配置高亮器:摘要大小 --> 
            <setting name="compass.engine.highlighter.default.fragmenter.simple.size" value="20" /> 
 
            <!-- 配置分词器 --> 
            <setting name="compass.engine.analyzer.default.type" value="jeasy.analysis.MMAnalyzer" /> 
        </settings> 
 
 
    </compass> 
</compass-core-config>  

Compass增删改查
CompassUtils.java

package com.my.utils; 
 
import org.compass.core.Compass; 
import org.compass.core.CompassSession; 
import org.compass.core.config.CompassConfiguration; 
 
public class CompassUtils { 
 
    private static Compass compassSessionFactory; 
 
    static { 
        CompassConfiguration cfg = new CompassConfiguration().configure(); 
        compassSessionFactory = cfg.buildCompass(); 
    } 
 
    /** 
     * 打开一个新的CompassSession并返回 
     *  
     * @return 
     */ 
    public static CompassSession openSession() { 
        return compassSessionFactory.openSession(); 
    } 
 
    public static Compass getCompassSessionFactory() { 
        return compassSessionFactory; 
    } 
 
} 
 

ArticleIndexDao.java

package com.my.compass; 
 
import java.util.ArrayList; 
import java.util.List; 
 
import org.compass.core.CompassHits; 
import org.compass.core.CompassSession; 
import org.compass.core.CompassTransaction; 
 
import com.my.bean.Article; 
import com.my.bean.QueryResult; 
import com.my.utils.CompassUtils; 
 
public class ArticleIndexDao { 
 
    /** 
     * 创建索引(保存到索引库) 
     *  
     * @param article 
     */ 
    public void save(Article article) { 
        CompassSession session = CompassUtils.openSession(); 
        CompassTransaction tx = null; 
        try { 
            tx = session.beginTransaction(); 
            session.create(article); // 建立索引 
            tx.commit(); 
        } catch (Exception e) { 
            tx.rollback(); 
            throw new RuntimeException(e); 
        } finally { 
            session.close(); 
        } 
    } 
 
    /** 
     * 删除索引 
     *  
     * Term:是指某字段中的某个关键词(在目录中出现的关键词) 
     *  
     * @param id 
     */ 
    public void delete(Integer id) { 
        CompassSession session = CompassUtils.openSession(); 
        CompassTransaction tx = null; 
        try { 
            tx = session.beginTransaction(); 
            session.delete(Article.class, id); // 删除索引 
            tx.commit(); 
        } catch (Exception e) { 
            tx.rollback(); 
            throw new RuntimeException(e); 
        } finally { 
            session.close(); 
        } 
    } 
 
    /** 
     * 更新索引 
     *  
     * @param article 
     */ 
    public void update(Article article) { 
        CompassSession session = CompassUtils.openSession(); 
        CompassTransaction tx = null; 
        try { 
            tx = session.beginTransaction(); 
            session.save(article); // 更新索引 
            tx.commit(); 
        } catch (Exception e) { 
            tx.rollback(); 
            throw new RuntimeException(e); 
        } finally { 
            session.close(); 
        } 
    } 
 
    /** 
     * 搜索(分页) 
     *  
     * @param queryString 
     * @param firstResult 
     * @param maxResults 
     * @return 总记录数 + 一页数据列表 
     */ 
    public QueryResult search(String queryString, int firstResult, int maxResults) { 
        CompassSession session = CompassUtils.openSession(); 
        CompassTransaction tx = null; 
        try { 
            tx = session.beginTransaction(); 
            // ------------------------------------------------- 
            // 查询,得到中间结果 
            CompassHits hits = session.find(queryString); 
            int count = hits.length(); 
 
            // 处理结果并返回 
            List<Article> list = new ArrayList<Article>(); 
            int endIndex = Math.min(firstResult + maxResults, hits.length()); 
 
            for (int i = firstResult; i < endIndex; i++) { // 只取一段数据 
                Article article = (Article) hits.data(i); 
 
                // ------------------------------------------------ 
                // 做高亮操作,一次高亮一个属性,如果当前高亮的属性值中没有出现搜索的关键字,则返回null 
                String text = hits.highlighter(i).fragment("content"); 
                if (text != null) { 
                    article.setContent(text); // 使用高亮后的文本替换原始内容 
                } 
                // ------------------------------------------------ 
 
                list.add(article); 
            } 
            tx.commit(); 
 
            return new QueryResult(count, list); 
            // ------------------------------------------------- 
        } catch (Exception e) { 
            tx.rollback(); 
            throw new RuntimeException(e); 
        } finally { 
            session.close(); 
        } 
    } 
} 

基于CompassTemplate的增删改查

package com.my.compass; 
 
import java.util.ArrayList; 
import java.util.List; 
 
import org.compass.core.CompassCallback; 
import org.compass.core.CompassException; 
import org.compass.core.CompassHits; 
import org.compass.core.CompassSession; 
import org.compass.core.CompassTemplate; 
 
import com.my.bean.Article; 
import com.my.bean.QueryResult; 
import com.my.utils.CompassUtils; 
 
public class ArticleIndexDao2 { 
 
    /** 
     * 创建索引(保存到索引库) 
     *  
     * @param article 
     */ 
    public void save(Article article) { 
        CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory()); 
        compassTemplate.create(article); 
    } 
 
    /** 
     * 删除索引 
     *  
     * Term:是指某字段中的某个关键词(在目录中出现的关键词) 
     *  
     * @param id 
     */ 
    public void delete(Integer id) { 
        CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory()); 
        compassTemplate.delete(Article.class, id); 
    } 
 
    /** 
     * 更新索引 
     *  
     * @param article 
     */ 
    public void update(Article article) { 
        CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory()); 
        compassTemplate.save(article); 
    } 
 
    /** 
     * 搜索(分页) 
     *  
     * @param queryString 
     * @param firstResult 
     * @param maxResults 
     * @return 总记录数 + 一页数据列表 
     */ 
    public QueryResult search(final String queryString, final int firstResult, final int maxResults) { 
        CompassTemplate compassTemplate = new CompassTemplate(CompassUtils.getCompassSessionFactory()); 
 
        return compassTemplate.execute(new CompassCallback<QueryResult>() { 
            public QueryResult doInCompass(CompassSession session) throws CompassException { 
                // 查询,得到中间结果 
                CompassHits hits = session.find(queryString); 
                int count = hits.length(); 
 
                // 处理结果并返回 
                List<Article> list = new ArrayList<Article>(); 
                int endIndex = Math.min(firstResult + maxResults, hits.length()); 
 
                for (int i = firstResult; i < endIndex; i++) { // 只取一段数据 
                    Article article = (Article) hits.data(i); 
 
                    // 做高亮操作,一次高亮一个属性,如果当前高亮的属性值中没有出现搜索的关键字,则返回null 
                    String text = hits.highlighter(i).fragment("content"); 
                    if (text != null) { 
                        article.setContent(text); // 使用高亮后的文本替换原始内容 
                    } 
 
                    list.add(article); 
                } 
                return new QueryResult(count, list); 
            } 
        }); 
    } 
} 

查询示例:

package com.my.compass; 
import java.util.ArrayList; 
import java.util.List; 
import org.compass.core.CompassHits; 
import org.compass.core.CompassQuery; 
import org.compass.core.CompassQueryBuilder.CompassMultiPhraseQueryBuilder; 
import org.compass.core.CompassSession; 
import org.compass.core.CompassTransaction; 
import org.junit.Test; 
import com.my.bean.Article; 
import com.my.utils.CompassUtils; 
public class QueryTest { 
@Test 
public void search() throws Exception { 
CompassSession session = CompassUtils.openSession(); 
CompassTransaction tx = session.beginTransaction(); 
// 查询 
// ------------------------------------------------ 
// 1,查询所有 
CompassQuery query1 = session.queryBuilder().matchAll(); 
// 关键词查询 
CompassQuery query2 = session.queryBuilder().term("title", "lucene"); 
// 范围查询 
CompassQuery query3 = session.queryBuilder().between("id", 5, 15, true); 
// 通配符查询 
CompassQuery query4 = session.queryBuilder().wildcard("title", "luc*n?"); 
// 模糊查询 
CompassQuery query5 = session.queryBuilder().fuzzy("title", "lucenx", 0.8F); 
// 短语查询 
CompassMultiPhraseQueryBuilder multiPhraseQueryBuilder = session.queryBuilder().multiPhrase("title"); 
multiPhraseQueryBuilder.add("lucene", 0); 
multiPhraseQueryBuilder.add("框架", 3); 
CompassQuery query6 = multiPhraseQueryBuilder.toQuery(); 
CompassQuery query7 = session.queryBuilder().multiPhrase("title") // 
.add("lucene", 0) // 
.add("框架", 3) // 
.toQuery(); 
CompassQuery query8 = session.queryBuilder().multiPhrase("title") // 
.add("lucene") // 
.add("框架") // 
.setSlop(5)// 词之间的间隔不超过5.toQuery(); 
// 布尔查询 
CompassQuery query = session.queryBuilder().bool()// 
// .addMust(query1) // 必须满足 
// .addMustNot(query2) // 非 
// .addShould(query3) // 多个Should一起用是OR的关系 
.addMust(query1)// 
.addMust(query2)// 
.toQuery(); 
CompassHits hits = query.hits(); 
List<Article> list = new ArrayList<Article>(); 
for (int i = 0; i < hits.length(); i++) { 
Article article = (Article) hits.data(i); 
list.add(article); 
} 
tx.commit(); 
session.close(); 
// 显示结果 
System.out.println("总结果数量为:" + list.size()); 
for (Article article : list) { 
System.out.println("--------> id = " + article.getId()); 
System.out.println("title  = " + article.getTitle()); 
System.out.println("content= " + article.getContent()); 
} 
} 
} 

过滤示例:

package com.my.compass; 
import java.util.ArrayList; 
import java.util.List; 
import org.compass.core.CompassHits; 
import org.compass.core.CompassQuery; 
import org.compass.core.CompassQueryFilter; 
import org.compass.core.CompassSession; 
import org.compass.core.CompassTransaction; 
import org.junit.Test; 
import com.my.bean.Article; 
import com.my.utils.CompassUtils; 
public class FilterTest { 
@Test 
public void search() throws Exception { 
// 搜索条件 
String queryString = "lucene"; 
List<Article> list = new ArrayList<Article>(); 
CompassSession session = CompassUtils.openSession(); 
CompassTransaction tx = session.beginTransaction(); 
// CompassHits hits = session.find(queryString); 
CompassQuery query = session.queryBuilder().queryString(queryString).toQuery(); 
// 指定过滤条件 
CompassQueryFilter filter = session.queryFilterBuilder().between("id", 1, 15, true, true); 
query.setFilter(filter); 
CompassHits hits = query.hits(); 
for (int i = 0; i < hits.length(); i++) { 
Article article = (Article) hits.data(i); 
list.add(article); 
} 
tx.commit(); 
session.close(); 
// 显示结果 
System.out.println("总结果数量为:" + list.size()); 
for (Article article : list) { 
System.out.println("--------> id = " + article.getId()); 
System.out.println("title  = " + article.getTitle()); 
System.out.println("content= " + article.getContent()); 
} 
} 
} 

排序示例:

package com.my.compass; 
import java.util.ArrayList; 
import java.util.List; 
import org.compass.core.CompassHits; 
import org.compass.core.CompassQuery; 
import org.compass.core.CompassQuery.SortDirection; 
import org.compass.core.CompassSession; 
import org.compass.core.CompassTransaction; 
import org.junit.Test; 
import com.my.bean.Article; 
import com.my.utils.CompassUtils; 
public class SortTest { 
@Test 
public void createIndex() throws Exception { 
Article article = new Article(); 
article.setId(27); 
article.setTitle("LuceneUtil与Lucene是全文检索的框架"); 
article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。"); 
article.setBoostValue(2F); // 默认是1F 
CompassSession session = CompassUtils.openSession(); 
CompassTransaction tx = session.beginTransaction(); 
session.create(article); // 创建索引 
tx.commit(); 
session.close(); 
} 
@Test 
public void search() throws Exception { 
// 搜索条件 
String queryString = "lucene"; 
List<Article> list = new ArrayList<Article>(); 
CompassSession session = CompassUtils.openSession(); 
CompassTransaction tx = session.beginTransaction(); 
CompassHits hits = session.find(queryString); 
for (int i = 0; i < hits.length(); i++) { 
Article article = (Article) hits.data(i); 
list.add(article); 
} 
tx.commit(); 
session.close(); 
// 显示结果 
System.out.println("总结果数量为:" + list.size()); 
for (Article article : list) { 
System.out.println("--------> id = " + article.getId()); 
System.out.println("title  = " + article.getTitle()); 
System.out.println("content= " + article.getContent()); 
} 
} 
@Test 
public void search2() throws Exception { 
// 搜索条件 
String queryString = "lucene"; 
// 进行搜索,得到结果 ? 
List<Article> list = new ArrayList<Article>(); 
CompassSession session = CompassUtils.openSession(); 
CompassTransaction tx = session.beginTransaction(); 
// CompassHits hits = session.find(queryString); 
CompassQuery query = session.queryBuilder().queryString(queryString).toQuery(); 
// query.addSort("id"); // 按id升序排列 
query.addSort("id", SortDirection.REVERSE); // 按id降序排列 
CompassHits hits = query.hits(); 
for (int i = 0; i < hits.length(); i++) { 
Article article = (Article) hits.data(i); 
list.add(article); 
} 
tx.commit(); 
session.close(); 
// 显示结果 
System.out.println("总结果数量为:" + list.size()); 
for (Article article : list) { 
System.out.println("--------> id = " + article.getId()); 
System.out.println("title  = " + article.getTitle()); 
System.out.println("content= " + article.getContent()); 
} 
} 
} 

原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/12104.html

(0)
上一篇 2021年7月19日
下一篇 2021年7月19日

相关推荐

发表回复

登录后才能评论