lucene4.9初体验（源码附件） -

w62268458

浏览: 13759 次
性别:
来自: 广州

最近访客更多访客>>

CheungGQ

sunjy22

worldseme

whisper527

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

lucene4.9初体验（源码附件）

博客分类：

lucene

lucene java

1.入门代码

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import com.test.utils.File2DocumentUtil;

public class HelloWorld {
	String filePath = "F:\\eclipse\\LuceneTest\\luceneDatasource\\小笑话_总统的房间 Room .txt";
	String indexPath = "F:\\eclipse\\LuceneTest\\luceneIndex";
	// 分析器
	Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_4_9);
	
	/**
	 * 多个Field组成一个Document，多个Document组成一个索引。
	 * @throws Exception
	 */
	@SuppressWarnings("resource")
	@Test
	public void createIndex() throws Exception {
		//索引库对象
		IndexWriter writer = null;
		// 索引文件的保存位置
		Directory dir = FSDirectory.open(new File(indexPath));

		// 配置类
//		APPEND：总是追加，可能会导致错误，索引还会重复，导致返回多次结果
//		CREATE：清空重建（推荐）
//		CREATE_OR_APPEND【默认】：创建或追加
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
		iwc.setOpenMode(OpenMode.CREATE);// 创建模式 OpenMode.CREATE_OR_APPEND //
		writer = new IndexWriter(dir, iwc);

		Document doc = File2DocumentUtil.file2Document(filePath);
		writer.addDocument(doc);

		writer.close();
	}
	
	/**
	 	1、创建IndexReader

		2、使用IndexReader创建IndexSearcher

		3、根据搜索关键字，使用QueryParser生成Query对象

		4、以Query作为参数调用IndexSearcher.search()，执行搜索

		5、以TopDocs以及ScoreDocs遍历结果并处理
	 * @throws IOException
	 * @throws ParseException
	 */
	@Test
	public void search() throws IOException, ParseException {

		String queryString = "document";
		String[] fields = { "name", "content" };
		QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_4_9, fields, analyzer);
		Query query = parser.parse(queryString);

		IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
		IndexSearcher indexSearcher = new IndexSearcher(reader);
		TopDocs topDocs = indexSearcher.search(query, 10000);
		System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");

		for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
			int docSn = scoreDoc.doc;
			Document doc = indexSearcher.doc(docSn);
			File2DocumentUtil.printDocumnetInfo(doc);
		}
	}

}

1.File2Document代码

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;

public class File2DocumentUtil {
	
	/**
	 * StringField即为NOT_ANALYZED的（即不对域的内容进行分割分析），而TextField是ANALYZED的
		因此，创建Field对象时，无需再指定分析类型了
	 * @param filePath
	 * @return
	 * @throws Exception
	 */
	public static Document file2Document(String filePath) throws Exception{
	
		File file = new File(filePath);
		Document doc = new Document();
		FileInputStream input = new FileInputStream(file);
		Reader reader = new BufferedReader(new InputStreamReader(input));
		//StringField索引但不分词
		Field nameField = new StringField("name", file.getName(), Field.Store.YES);
//		LongField：索引但是不分词
		Field sizeField = new LongField("size", file.length(), Field.Store.YES);
		Field pathField = new StringField("path", file.getAbsolutePath(), Field.Store.YES);
//		TextField：索引并分词
		//Field contentField = new TextField("content", reader);
		Field contentField = new TextField("content", readFile(file), Field.Store.YES);
		doc.add(nameField);
		doc.add(sizeField);
		doc.add(pathField);
		doc.add(contentField);
		return doc;
	}
	
	public static String readFile(File file) throws IOException{
		BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
		StringBuilder content = new StringBuilder();
		for(String line = null; (line = reader.readLine()) != null ;){
			content.append(line).append("\n");
		}
		return content.toString();
	}

	public static void printDocumnetInfo(Document doc) {
		Field field = (Field) doc.getField("name");
		System.out.println(field.stringValue());
		
		System.out.println(doc.get("path"));
		System.out.println(doc.get("size"));
		System.out.println(doc.get("content"));
	}
}

LuceneTest.rar (7.7 MB)
下载次数: 1

分享到：

lucene4.9之analyzer | byte[]转int

2015-01-28 09:29
浏览 518
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene4.9初体验（源码附件）

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene4.9初体验（源码附件）

评论

发表评论

相关推荐

lucene原理

lucene之boost

lucene4.9之highlight

lucene4.9之Query

lucene4.9之analyzer

lucene4.9初体验

最近访客更多访客>>