管理员
2 years ago
5 changed files with 17 additions and 238 deletions
@ -1,128 +0,0 @@ |
|||||
package org; |
|
||||
|
|
||||
import org.apache.commons.io.FileUtils; |
|
||||
import org.apache.lucene.analysis.Analyzer; |
|
||||
import org.apache.lucene.document.*; |
|
||||
import org.apache.lucene.index.DirectoryReader; |
|
||||
import org.apache.lucene.index.IndexReader; |
|
||||
import org.apache.lucene.index.IndexWriter; |
|
||||
import org.apache.lucene.index.IndexWriterConfig; |
|
||||
import org.apache.lucene.queryparser.classic.QueryParser; |
|
||||
import org.apache.lucene.search.IndexSearcher; |
|
||||
import org.apache.lucene.search.Query; |
|
||||
import org.apache.lucene.search.ScoreDoc; |
|
||||
import org.apache.lucene.search.TopDocs; |
|
||||
import org.apache.lucene.store.Directory; |
|
||||
import org.apache.lucene.store.FSDirectory; |
|
||||
import org.wltea.analyzer.lucene.IKAnalyzer; |
|
||||
|
|
||||
import java.io.File; |
|
||||
import java.io.IOException; |
|
||||
|
|
||||
/** |
|
||||
* @author ldjun |
|
||||
* @version 1.0 |
|
||||
* @date 2023年05月12日 10:16 |
|
||||
* @desc desc |
|
||||
*/ |
|
||||
public class Test { |
|
||||
public static void main(String[] args) throws Exception { |
|
||||
createIndex(); |
|
||||
searchIndex(); |
|
||||
} |
|
||||
|
|
||||
private static void createIndex() throws IOException { |
|
||||
//1、创建一个Director对象,指定索引库保存的位置。
|
|
||||
//把索引库保存在内存中
|
|
||||
//Directory directory = new RAMDirectory();
|
|
||||
//把索引库保存在磁盘
|
|
||||
Directory directory = FSDirectory.open(new File("E:\\lucene3").toPath()); |
|
||||
//2、基于Directory对象创建一个IndexWriter对象
|
|
||||
IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer()); |
|
||||
IndexWriter indexWriter = new IndexWriter(directory, config); |
|
||||
|
|
||||
//3、读取磁盘上的文件,对应每个文件创建一个文档对象。
|
|
||||
File dir = new File("E:\\lucene2"); |
|
||||
|
|
||||
|
|
||||
File[] files = dir.listFiles(); |
|
||||
for (File f : files) { |
|
||||
//取文件名
|
|
||||
String fileName = f.getName(); |
|
||||
//文件的路径
|
|
||||
String filePath = f.getPath(); |
|
||||
//文件的内容
|
|
||||
String fileContent = FileUtils.readFileToString(f, "utf-8"); |
|
||||
//文件的大小
|
|
||||
long fileSize = FileUtils.sizeOf(f); |
|
||||
//创建Field
|
|
||||
//参数1:域的名称,参数2:域的内容,参数3:是否存储
|
|
||||
Field fieldName = new TextField("name", fileName, Field.Store.YES); |
|
||||
Field fieldPath = new StoredField("path", filePath); |
|
||||
Field fieldContent = new TextField("content", fileContent, Field.Store.YES); |
|
||||
Field fieldSizeValue = new LongPoint("size", fileSize); |
|
||||
Field fieldSizeStore = new StoredField("size", fileSize); |
|
||||
//创建文档对象
|
|
||||
Document document = new Document(); |
|
||||
//向文档对象中添加域
|
|
||||
document.add(fieldName); |
|
||||
document.add(fieldPath); |
|
||||
document.add(fieldContent); |
|
||||
//document.add(fieldSize);
|
|
||||
document.add(fieldSizeValue); |
|
||||
document.add(fieldSizeStore); |
|
||||
//5、把文档对象写入索引库
|
|
||||
indexWriter.addDocument(document); |
|
||||
} |
|
||||
//6、关闭indexwriter对象
|
|
||||
indexWriter.close(); |
|
||||
} |
|
||||
|
|
||||
public static void searchIndex() throws Exception { |
|
||||
//1、创建一个Director对象,指定索引库的位置
|
|
||||
Directory directory = FSDirectory.open(new File("E:\\lucene3").toPath()); |
|
||||
//2、创建一个IndexReader对象
|
|
||||
IndexReader indexReader = DirectoryReader.open(directory); |
|
||||
//3、创建一个IndexSearcher对象,构造方法中的参数indexReader对象。
|
|
||||
IndexSearcher indexSearcher = new IndexSearcher(indexReader); |
|
||||
//4、创建一个Query对象,TermQuery
|
|
||||
// Query query = new TermQuery(new Term("name", "123"));
|
|
||||
//5、执行查询,得到一个TopDocs对象
|
|
||||
//参数1:查询对象 参数2:查询结果返回的最大记录数
|
|
||||
// TopDocs topDocs = indexSearcher.search(query, 10);
|
|
||||
|
|
||||
|
|
||||
// 使用的分词器
|
|
||||
Analyzer analyzer = new IKAnalyzer(true); |
|
||||
// 要搜索的字段
|
|
||||
String filedName = "content"; |
|
||||
// 查询生成器(解析输入生成Query查询对象)
|
|
||||
QueryParser parser = new QueryParser(filedName, analyzer); |
|
||||
// 通过parse解析输入(分词),生成query对象
|
|
||||
Query query = parser.parse("2023-04-14"); |
|
||||
|
|
||||
|
|
||||
|
|
||||
TopDocs topDocs = indexSearcher.search(query,2); |
|
||||
|
|
||||
//6、取查询结果的总记录数
|
|
||||
System.out.println("查询总记录数:" + topDocs.totalHits); |
|
||||
//7、取文档列表
|
|
||||
ScoreDoc[] scoreDocs = topDocs.scoreDocs; |
|
||||
//8、打印文档中的内容
|
|
||||
for (ScoreDoc doc : scoreDocs) { |
|
||||
//取文档id
|
|
||||
int docId = doc.doc; |
|
||||
|
|
||||
|
|
||||
//根据id取文档对象
|
|
||||
Document document = indexSearcher.doc(docId); |
|
||||
System.out.println(document.get("name")); |
|
||||
System.out.println(document.get("path")); |
|
||||
System.out.println(document.get("size")); |
|
||||
System.out.println(document.get("content")); |
|
||||
} |
|
||||
//9、关闭IndexReader对象
|
|
||||
indexReader.close(); |
|
||||
} |
|
||||
} |
|
@ -1,103 +0,0 @@ |
|||||
package org; |
|
||||
|
|
||||
import org.apache.lucene.analysis.TokenStream; |
|
||||
import org.apache.lucene.document.Document; |
|
||||
import org.apache.lucene.document.Field; |
|
||||
import org.apache.lucene.document.TextField; |
|
||||
import org.apache.lucene.index.*; |
|
||||
import org.apache.lucene.queryparser.classic.QueryParser; |
|
||||
import org.apache.lucene.search.IndexSearcher; |
|
||||
import org.apache.lucene.search.Query; |
|
||||
import org.apache.lucene.search.ScoreDoc; |
|
||||
import org.apache.lucene.search.highlight.Highlighter; |
|
||||
import org.apache.lucene.search.highlight.QueryScorer; |
|
||||
import org.apache.lucene.search.highlight.SimpleHTMLFormatter; |
|
||||
import org.apache.lucene.store.Directory; |
|
||||
import org.apache.lucene.store.RAMDirectory; |
|
||||
import org.wltea.analyzer.lucene.IKAnalyzer; |
|
||||
|
|
||||
import java.io.IOException; |
|
||||
import java.io.StringReader; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
|
|
||||
public class TestLucene { |
|
||||
|
|
||||
public static void main(String[] args) throws Exception { |
|
||||
// 1. 准备中文分词器
|
|
||||
IKAnalyzer analyzer = new IKAnalyzer(); |
|
||||
|
|
||||
// 2. 索引
|
|
||||
List<String> productNames = new ArrayList<>(); |
|
||||
productNames.add("飞利浦led灯泡e27螺口暖白球泡灯家用照明超亮节能灯泡转色温灯泡"); |
|
||||
productNames.add("飞利浦led灯泡e14螺口蜡烛灯泡3W尖泡拉尾节能灯泡暖黄光源Lamp"); |
|
||||
productNames.add("雷士照明 LED灯泡 e27大螺口节能灯3W球泡灯 Lamp led节能灯泡"); |
|
||||
productNames.add("飞利浦 led灯泡 e27螺口家用3w暖白球泡灯节能灯5W灯泡LED单灯7w"); |
|
||||
productNames.add("飞利浦led小球泡e14螺口4.5w透明款led节能灯泡照明光源lamp单灯"); |
|
||||
productNames.add("飞利浦蒲公英护眼台灯工作学习阅读节能灯具30508带光源"); |
|
||||
productNames.add("欧普照明led灯泡蜡烛节能灯泡e14螺口球泡灯超亮照明单灯光源"); |
|
||||
productNames.add("欧普照明led灯泡节能灯泡超亮光源e14e27螺旋螺口小球泡暖黄家用"); |
|
||||
productNames.add("聚欧普照明led灯泡节能灯泡e27螺口球泡家用led照明单灯超亮光源"); |
|
||||
Directory index = createIndex(analyzer, productNames); |
|
||||
|
|
||||
// 3. 查询器
|
|
||||
String keyword = "护眼带光源"; |
|
||||
Query query = new QueryParser("name", analyzer).parse(keyword); |
|
||||
|
|
||||
|
|
||||
// 4. 搜索
|
|
||||
IndexReader reader = DirectoryReader.open(index); |
|
||||
IndexSearcher searcher = new IndexSearcher(reader); |
|
||||
int numberPerPage = 1000; |
|
||||
System.out.printf("当前一共有%d条数据%n",productNames.size()); |
|
||||
System.out.printf("查询关键字是:\"%s\"%n",keyword); |
|
||||
ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs; |
|
||||
|
|
||||
// 5. 显示查询结果
|
|
||||
showSearchResults(searcher, hits, query, analyzer); |
|
||||
// 6. 关闭查询
|
|
||||
reader.close(); |
|
||||
} |
|
||||
|
|
||||
private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) |
|
||||
throws Exception { |
|
||||
System.out.println("找到 " + hits.length + " 个命中."); |
|
||||
System.out.println("序号\t匹配度得分\t结果"); |
|
||||
|
|
||||
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); |
|
||||
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); |
|
||||
|
|
||||
for (int i = 0; i < hits.length; ++i) { |
|
||||
ScoreDoc scoreDoc= hits[i]; |
|
||||
int docId = scoreDoc.doc; |
|
||||
Document d = searcher.doc(docId); |
|
||||
List<IndexableField> fields = d.getFields(); |
|
||||
System.out.print((i + 1)); |
|
||||
System.out.print("\t" + scoreDoc.score); |
|
||||
for (IndexableField f : fields) { |
|
||||
TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); |
|
||||
String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); |
|
||||
System.out.print("\t" + fieldContent); |
|
||||
} |
|
||||
System.out.println("<br>"); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private static Directory createIndex(IKAnalyzer analyzer, List<String> products) throws IOException { |
|
||||
Directory index = new RAMDirectory(); |
|
||||
IndexWriterConfig config = new IndexWriterConfig(analyzer); |
|
||||
IndexWriter writer = new IndexWriter(index, config); |
|
||||
|
|
||||
for (String name : products) { |
|
||||
addDoc(writer, name); |
|
||||
} |
|
||||
writer.close(); |
|
||||
return index; |
|
||||
} |
|
||||
|
|
||||
private static void addDoc(IndexWriter w, String name) throws IOException { |
|
||||
Document doc = new Document(); |
|
||||
doc.add(new TextField("name", name, Field.Store.YES)); |
|
||||
w.addDocument(doc); |
|
||||
} |
|
||||
} |
|
Loading…
Reference in new issue