在Lucene搜索

时间:2016-01-08 03:45:55

标签: apache lucene

我是Lucene的新手,所以我从http://www.lucenetutorial.com/sample-apps/textfileindexer-java.html下载了一个示例。 代码目前有效,但我认为我没有正确使用Lucene。我可以第一次搜索一个单词(例如:是学生),但在那之后(仍然在循环中),如果我搜索相同的单词,它将返回异常。 (的显示java.lang.NullPointerException ) 请帮我修理一下。

Here's my problem

这是代码

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.*;
import java.util.ArrayList;
public class TextFileIndexer {
private static StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);

private IndexWriter writer;
private ArrayList<File> queue = new ArrayList<File>();


public static void main(String[] args) throws IOException {
System.out.println("Enter the path where the index will be created: (e.g. /tmp/index or c:\temp\index)");

String indexLocation = null;
BufferedReader br = new BufferedReader(
        new InputStreamReader(System.in));
String s = br.readLine();

TextFileIndexer indexer = null;
try {
  indexLocation = s;
  indexer = new TextFileIndexer(s);
} catch (Exception ex) {
  System.out.println("Cannot create index..." + ex.getMessage());
  System.exit(-1);
}

while (!s.equalsIgnoreCase("q")) {
  try {
    System.out.println("Enter the full path to add into the index (q=quit): (e.g. /home/ron/mydir or c:\Users\ron\mydir)");
    System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
    s = br.readLine();
    if (s.equalsIgnoreCase("q")) {
      break;
    }

    indexer.indexFileOrDirectory(s);
  } catch (Exception e) {
    System.out.println("Error indexing " + s + " : " + e.getMessage());
  }
}

indexer.closeIndex();

IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(5, true);

s = "";
while (!s.equalsIgnoreCase("q")) {
  try {
    System.out.println("Enter the search query (q=quit):");
    s = br.readLine();
    if (s.equalsIgnoreCase("q")) {
      break;
    }
    Query q = new QueryParser(Version.LUCENE_40, "contents", analyzer).parse(s);
    searcher.search(q, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    // 4. display results
    System.out.println("Found " + hits.length + " hits.");
    for(int i=0;i<hits.length;++i) {
      int docId = hits[i].doc;
      Document d = searcher.doc(docId);
      System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score);
    }

  } catch (Exception e) {
    System.out.println("Error searching " + s + " : " + e.getMessage());
  }
}

  }

  /**
  * Constructor
  * @param indexDir the name of the folder in which the index should be created
  * @throws java.io.IOException when exception creating index.
  */
  TextFileIndexer(String indexDir) throws IOException {

FSDirectory dir = FSDirectory.open(new File(indexDir));


IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);

writer = new IndexWriter(dir, config);
  }

  /**
 * Indexes a file or directory
 * @param fileName the name of a text file or a folder we wish to add to the index
 * @throws java.io.IOException when exception
 */
  public void indexFileOrDirectory(String fileName) throws IOException {

addFiles(new File(fileName));

int originalNumDocs = writer.numDocs();
for (File f : queue) {
  FileReader fr = null;
  try {
    Document doc = new Document();

    fr = new FileReader(f);
    doc.add(new TextField("contents", fr));
    doc.add(new StringField("path", f.getPath(), Field.Store.YES));
    doc.add(new StringField("filename", f.getName(), Field.Store.YES));

    writer.addDocument(doc);
    System.out.println("Added: " + f);
  } catch (Exception e) {
    System.out.println("Could not add: " + f);
  } finally {
    fr.close();
  }
}

 int newNumDocs = writer.numDocs();
 System.out.println("");
 System.out.println("************************");
 System.out.println((newNumDocs - originalNumDocs) + " documents added.");
 System.out.println("************************");

 queue.clear();
}

 private void addFiles(File file) {

 if (!file.exists()) {
   System.out.println(file + " does not exist.");
 }
 if (file.isDirectory()) {
   for (File f : file.listFiles()) {
     addFiles(f);
   }
 } else {
   String filename = file.getName().toLowerCase();

   if (filename.endsWith(".htm") || filename.endsWith(".html") ||
          filename.endsWith(".xml") || filename.endsWith(".txt")) {
     queue.add(file);
   } else {
     System.out.println("Skipped " + filename);
   }
  }
 }

  public void closeIndex() throws IOException {
   writer.close();
  }
 }

p / s:英语不是我的母语,所以请忽略我的语法或单词错误。

1 个答案:

答案 0 :(得分:0)

我检查了你的代码。您只需在方法TopScoreDocCollector之前的周期中实例化search。在您的代码片段下方,其中包含我更改代码的注释:

...       

    //REMOVE AND INSTANTIATE IN THE CYCLE!      TopScoreDocCollector collector = TopScoreDocCollector.create ( 5, true );

                    s = "";
                    while ( !s.equalsIgnoreCase ( "q" ) ) {
                        try {
                            System.out.println ( "Enter the search query (q=quit):" );
                            s = br.readLine ();
                            if ( s.equalsIgnoreCase ( "q" ) ) {
                                break;
                            }
                            // INTANTIATE HERE!!!
                            TopScoreDocCollector collector = TopScoreDocCollector.create ( 5, true );

                            Query q = new QueryParser ( Version.LUCENE_40, "contents", analyzer ).parse ( s );
                            searcher.search ( q, collector );

...

否则,您可以使用方法search的另一个签名,并明确删除TopScoreDocCollector的显式实例。例如:

TopDocs topDocs = searcher.search ( q, 5);
ScoreDoc[] hits = topDocs.scoreDocs;

我认为你可以解决你的问题。