Question

我正在尝试从数据库搜索转到Lucene搜索。我的文本文件很少，有数据，其中一个文本文件中的示例数据是

N =以太网，L =无效，IM = XX123，SN = 286-054-754，HBF =无效，BON =无效， VSR = null，DUID = null，MID = 2，IP = 10.21.122.136，MAC = 60：C7：98：17：57：80， SYNC = false，GN = null，CustParam3 = null，CustParam2 = null，VV = 1.06.0007， CustParam5 = null，CustParam4 = null，CustParam7 = null，CustParam6 = null， BUNAME =空，PN = M132-409-01-R，CustParam8 =空，CS = 2015-09-30 19：49：25.0，CST =不活动，BL = 3.2，EE =关闭，TID = 190，PRL = VEM，PAV =空， FAV = null，MON = 2016-04-06 11：13：40.507，DON = null，LPDR = 2015-09-30 19：50：23.85，SSID = null，PIP = null，DID = null，MDATE = null， OV = rel-20120625-SC-3.1.2-B，CID = null，ICBI = false，TID = null， LCR = 2015-10-01 01：50：30.297，SS =无近期通信，CBU =空， GMVR =，LID = store，FF = 167340，HFP = RATNERCO >> blore，ISA = false， TF = null，FAM = null，LDPDR = 2015-09-30 19：50：39.113，STVER = True， SID = null，LHB = 2015-09-30 21：50：30.297，IDSS = false，FR = 81796， LMOS = 2015-09-30 19：49：50.503，LCUS = null，MNAME = XX 123，BBUID = null， CON = null，DBUN = null，ISDRA = false，POSV = null，UUID = 2，TRAM = null， SPOL = 000000000，CustomField1 = null，CustomField2 = null， CustomField3 = null，MUID = 2DE02CF3-0663-420A-8918-7A550E29F570， CustomField4 = null，CustomField5 = null，HNAME = blore，customparam1 = null， HID = 1048，LBDT = 2015-07-06 12：03：45.0，DIC = null，AT = None，LID = null， IDSA = false，LMPS = 2015-09-30 15：49：50.457，MBUN = System，CNC = Ethernet， LOC = null

我正在创建索引并使用StandardAnalyzer进行搜索，但无法使用字符串UUID = 1进行搜索，我在这里得到的值是还有没有的UUID = 1（总共我有两个文件，并且两个文件的内容都显示出来）。由于数据具有特殊字符，因此我也尝试使用WhiteSpaceAnalyzer，但随后它没有返回任何数据。我创建了一个自定义分析器，该分析器具有空格，小写字母和标准令牌过滤器，但没有帮助。我还扩展了StopwordAnalyzerBase来创建自己的分析器，并使用NormalizeCharMap替换了特殊字符，这很有帮助，但是我无法进行通配符搜索。

请某人在此方面帮助我。我是Lucene的新手。

    import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class IndexCreator
{
public void createIndex(String inputFiles,  String indexPath)
{
    //Input Path Variable
    final Path docDir = Paths.get(inputFiles);

    try
    {
        //org.apache.lucene.store.Directory instance
        Directory dir = FSDirectory.open( Paths.get(indexPath) );

        //analyzer with the default stop words
        //Analyzer analyzer = new NewStandardAnalyzer();
        //Analyzer analyzer =  buildAnalyzer();
        //Analyzer analyzer =  new WhitespaceAnalyzer();

        Analyzer analyzer = new StandardAnalyzer();
        //IndexWriter Configuration
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);

        //IndexWriter writes new index files to the directory
        IndexWriter writer = new IndexWriter(dir, iwc);

        //Its recursive method to iterate all files and directories
        indexDocs(writer, docDir);

        writer.commit();
    }
    catch (IOException e)
    {
        e.printStackTrace();
    }
}

private void indexDocs(final IndexWriter writer, Path path) throws 
IOException
{
    //Directory?
    if (Files.isDirectory(path))
    {
        //Iterate directory
        Files.walkFileTree(path, new SimpleFileVisitor<Path>()
        {
            @Override
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException
            {
                try
                {
                    //Index this file
                    indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
                }
                catch (IOException ioe)
                {
                    ioe.printStackTrace();
                }
                return FileVisitResult.CONTINUE;
            }
        });
    }
    else
    {
        //Index this file
        indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
    }
}

private void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException
{
    try (InputStream stream = Files.newInputStream(file))
    {
        //Create lucene Document
        Document doc = new Document();

        String content = new String(Files.readAllBytes(file));
        //content = content.replace("-", "\\-");
        //content = content.replace(":", "\\:");
        //content = content.replace("=", "\\=");
        //content = content.replace(".", "\\.");
        doc.add(new StringField("path", file.toString(), Field.Store.YES));
        doc.add(new LongPoint("modified", lastModified));
        doc.add(new TextField("contents", content, Store.YES));

        //Updates a document by first deleting the document(s)
        //containing <code>term</code> and then adding the new
        //document.  The delete and then add are atomic as seen
        //by a reader on the same index
        writer.updateDocument(new Term("path", file.toString()), doc);
    }
}

    public static Analyzer buildAnalyzer() throws IOException {
        return CustomAnalyzer.builder()
                .withTokenizer("whitespace")
                .addTokenFilter("lowercase")
                .addTokenFilter("standard")
                .build();

}

public static void main(String[] args) {

        IndexCreator indexCreator = new IndexCreator();indexCreator.createIndex(
"C:\\Lucene\\LuceneLatest\\LuceneLatestModified\\Data", 
 , 
"C:\\Lucene\\LuceneLatest\\LuceneLatestModified\\Index");
        System.out.println("Done");
    }
}


    import java.io.IOException;
    import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Searcher
{
//directory contains the lucene indexes
private static final String INDEX_DIR = 
"C:\\Lucene\\LuceneLatest\\LuceneLatestModified\\Index";

public static void main(String[] args) throws Exception
{
    //Create lucene searcher. It search over a single IndexReader.
    Searcher searcher = new Searcher(); 

    //Search indexed contents using search term
    /*searcher.searchInContent("NETWORKCONFIGURATION=Ethernet AND MACADDRESS=60\\:C7\\:98\\:17\\:57\\:80", searcher.createSearcher());
    searcher.searchInContent("NETWORKCONFIGURATION=Ethern*", searcher.createSearcher());*/
    searcher.searchInContent("UUID=1", searcher.createSearcher());

}

private void searchInContent(String textToFind, IndexSearcher searcher) throws Exception
{
    //Create search query
    //QueryParser qp = new QueryParser("contents", new StandardAnalyzer());

    QueryParser qp = new QueryParser("contents", new StandardAnalyzer());
    //textToFind = QueryParser.escape(textToFind).toLowerCase();
    Query query = qp.parse(textToFind);


    //search the index
    TopDocs hits = searcher.search(query, 10);

    System.out.println("Total Results :: " + hits.totalHits);

    for (ScoreDoc sd : hits.scoreDocs)
    {
        Document d = searcher.doc(sd.doc);
        System.out.println("Path : "+ d.get("path") + ", Score : " + sd.score + ", Content : "+d.get("contents"));
    }

}

private IndexSearcher createSearcher() throws IOException
{
    Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));

    //It is an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);

    //Index searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    return searcher;
}

public static Analyzer buildAnalyzer() throws IOException {
    return CustomAnalyzer.builder()
            .withTokenizer("whitespace")
            .addTokenFilter("lowercase")
            .addTokenFilter("standard")
            .build();}}

Lucene特殊字符搜索

0 个答案: