我正在尝试从数据库搜索转到Lucene搜索。我的文本文件很少,有数据,其中一个文本文件中的示例数据是
N =以太网,L =无效,IM = XX123,SN = 286-054-754,HBF =无效,BON =无效, VSR = null,DUID = null,MID = 2,IP = 10.21.122.136,MAC = 60:C7:98:17:57:80, SYNC = false,GN = null,CustParam3 = null,CustParam2 = null,VV = 1.06.0007, CustParam5 = null,CustParam4 = null,CustParam7 = null,CustParam6 = null, BUNAME =空,PN = M132-409-01-R,CustParam8 =空,CS = 2015-09-30 19:49:25.0,CST =不活动,BL = 3.2,EE =关闭,TID = 190,PRL = VEM,PAV =空, FAV = null,MON = 2016-04-06 11:13:40.507,DON = null,LPDR = 2015-09-30 19:50:23.85,SSID = null,PIP = null,DID = null,MDATE = null, OV = rel-20120625-SC-3.1.2-B,CID = null,ICBI = false,TID = null, LCR = 2015-10-01 01:50:30.297,SS =无近期通信,CBU =空, GMVR =,LID = store,FF = 167340,HFP = RATNERCO >> blore,ISA = false, TF = null,FAM = null,LDPDR = 2015-09-30 19:50:39.113,STVER = True, SID = null,LHB = 2015-09-30 21:50:30.297,IDSS = false,FR = 81796, LMOS = 2015-09-30 19:49:50.503,LCUS = null,MNAME = XX 123,BBUID = null, CON = null,DBUN = null,ISDRA = false,POSV = null,UUID = 2,TRAM = null, SPOL = 000000000,CustomField1 = null,CustomField2 = null, CustomField3 = null,MUID = 2DE02CF3-0663-420A-8918-7A550E29F570, CustomField4 = null,CustomField5 = null,HNAME = blore,customparam1 = null, HID = 1048,LBDT = 2015-07-06 12:03:45.0,DIC = null,AT = None,LID = null, IDSA = false,LMPS = 2015-09-30 15:49:50.457,MBUN = System,CNC = Ethernet, LOC = null
我正在创建索引并使用StandardAnalyzer进行搜索,但无法使用字符串UUID = 1进行搜索,我在这里得到的值是 还有没有的UUID = 1(总共我有两个文件,并且两个文件的内容都显示出来)。由于数据具有特殊字符,因此我也尝试使用WhiteSpaceAnalyzer,但随后它没有返回任何数据。我创建了一个自定义分析器,该分析器具有空格,小写字母和标准令牌过滤器,但没有帮助。我还扩展了StopwordAnalyzerBase来创建自己的分析器,并使用NormalizeCharMap替换了特殊字符,这很有帮助,但是我无法进行通配符搜索。
请某人在此方面帮助我。我是Lucene的新手。
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class IndexCreator
{
public void createIndex(String inputFiles, String indexPath)
{
//Input Path Variable
final Path docDir = Paths.get(inputFiles);
try
{
//org.apache.lucene.store.Directory instance
Directory dir = FSDirectory.open( Paths.get(indexPath) );
//analyzer with the default stop words
//Analyzer analyzer = new NewStandardAnalyzer();
//Analyzer analyzer = buildAnalyzer();
//Analyzer analyzer = new WhitespaceAnalyzer();
Analyzer analyzer = new StandardAnalyzer();
//IndexWriter Configuration
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
//IndexWriter writes new index files to the directory
IndexWriter writer = new IndexWriter(dir, iwc);
//Its recursive method to iterate all files and directories
indexDocs(writer, docDir);
writer.commit();
}
catch (IOException e)
{
e.printStackTrace();
}
}
private void indexDocs(final IndexWriter writer, Path path) throws
IOException
{
//Directory?
if (Files.isDirectory(path))
{
//Iterate directory
Files.walkFileTree(path, new SimpleFileVisitor<Path>()
{
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException
{
try
{
//Index this file
indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
}
catch (IOException ioe)
{
ioe.printStackTrace();
}
return FileVisitResult.CONTINUE;
}
});
}
else
{
//Index this file
indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
}
}
private void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException
{
try (InputStream stream = Files.newInputStream(file))
{
//Create lucene Document
Document doc = new Document();
String content = new String(Files.readAllBytes(file));
//content = content.replace("-", "\\-");
//content = content.replace(":", "\\:");
//content = content.replace("=", "\\=");
//content = content.replace(".", "\\.");
doc.add(new StringField("path", file.toString(), Field.Store.YES));
doc.add(new LongPoint("modified", lastModified));
doc.add(new TextField("contents", content, Store.YES));
//Updates a document by first deleting the document(s)
//containing <code>term</code> and then adding the new
//document. The delete and then add are atomic as seen
//by a reader on the same index
writer.updateDocument(new Term("path", file.toString()), doc);
}
}
public static Analyzer buildAnalyzer() throws IOException {
return CustomAnalyzer.builder()
.withTokenizer("whitespace")
.addTokenFilter("lowercase")
.addTokenFilter("standard")
.build();
}
public static void main(String[] args) {
IndexCreator indexCreator = new IndexCreator();indexCreator.createIndex(
"C:\\Lucene\\LuceneLatest\\LuceneLatestModified\\Data",
,
"C:\\Lucene\\LuceneLatest\\LuceneLatestModified\\Index");
System.out.println("Done");
}
}
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Searcher
{
//directory contains the lucene indexes
private static final String INDEX_DIR =
"C:\\Lucene\\LuceneLatest\\LuceneLatestModified\\Index";
public static void main(String[] args) throws Exception
{
//Create lucene searcher. It search over a single IndexReader.
Searcher searcher = new Searcher();
//Search indexed contents using search term
/*searcher.searchInContent("NETWORKCONFIGURATION=Ethernet AND MACADDRESS=60\\:C7\\:98\\:17\\:57\\:80", searcher.createSearcher());
searcher.searchInContent("NETWORKCONFIGURATION=Ethern*", searcher.createSearcher());*/
searcher.searchInContent("UUID=1", searcher.createSearcher());
}
private void searchInContent(String textToFind, IndexSearcher searcher) throws Exception
{
//Create search query
//QueryParser qp = new QueryParser("contents", new StandardAnalyzer());
QueryParser qp = new QueryParser("contents", new StandardAnalyzer());
//textToFind = QueryParser.escape(textToFind).toLowerCase();
Query query = qp.parse(textToFind);
//search the index
TopDocs hits = searcher.search(query, 10);
System.out.println("Total Results :: " + hits.totalHits);
for (ScoreDoc sd : hits.scoreDocs)
{
Document d = searcher.doc(sd.doc);
System.out.println("Path : "+ d.get("path") + ", Score : " + sd.score + ", Content : "+d.get("contents"));
}
}
private IndexSearcher createSearcher() throws IOException
{
Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));
//It is an interface for accessing a point-in-time view of a lucene index
IndexReader reader = DirectoryReader.open(dir);
//Index searcher
IndexSearcher searcher = new IndexSearcher(reader);
return searcher;
}
public static Analyzer buildAnalyzer() throws IOException {
return CustomAnalyzer.builder()
.withTokenizer("whitespace")
.addTokenFilter("lowercase")
.addTokenFilter("standard")
.build();}}