如何在solr 6.3中使用SpanNearQuery访问Spans

时间:2016-12-30 18:11:20

标签: solr lucene

我正在尝试通过对包含这些术语的段落进行排名来构建查询解析器 我知道我需要使用SpanNearQuery,但即使在完成文档后我也找不到访问Spans的方法。我得到的方法返回null。

我已阅读https://lucidworks.com/blog/2009/07/18/the-spanquery/,其中以良好的方式解释了该查询。这解释了如何访问跨度,但它适用于solr 4.0,不幸的是solr 6.3不再具有原子阅读器。
我怎样才能获得实际的跨度?

public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    log.warn("in Process");
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;
    }
    Query origQuery = rb.getQuery();
    // TODO: longer term, we don't have to be a span query, we could re-analyze the document
    if (origQuery != null) {
        if (origQuery instanceof SpanNearQuery == false) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                    "Illegal query type.  The incoming query must be a Lucene SpanNearQuery and it was a " + origQuery.getClass().getName());
        }

        SpanNearQuery sQuery = (SpanNearQuery) origQuery;
        SolrIndexSearcher searcher = rb.req.getSearcher();
        IndexReader reader = searcher.getIndexReader();

        log.warn("before leaf reader context");
        List<LeafReaderContext> ctxs = (List<LeafReaderContext>) reader.leaves();
        log.warn("after leaf reader context");

        LeafReaderContext ctx = ctxs.get(0);
        SpanWeight spanWeight = sQuery.createWeight(searcher, true);
        Spans spans = spanWeight.getSpans(ctx, SpanWeight.Postings.POSITIONS);

        AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);
        Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>();
        Spans spans = fleeceQ.getSpans(wrapper.getContext(), new Bits.MatchAllBits(reader.numDocs()), termContexts);

        // SpanWeight.Postings[] postings= SpanWeight.Postings.values();
        // Spans spans = sQuery.getSpans();
        // Assumes the query is a SpanQuery

        // Build up the query term weight map and the bi-gram
        Map<String, Float> termWeights = new HashMap<String, Float>();
        Map<String, Float> bigramWeights = new HashMap<String, Float>();
        createWeights(params.get(CommonParams.Q), sQuery, termWeights, bigramWeights, reader);
        float adjWeight = params.getFloat(ADJACENT_WEIGHT, DEFAULT_ADJACENT_WEIGHT);
        float secondAdjWeight = params.getFloat(SECOND_ADJ_WEIGHT, DEFAULT_SECOND_ADJACENT_WEIGHT);
        float bigramWeight = params.getFloat(BIGRAM_WEIGHT, DEFAULT_BIGRAM_WEIGHT);

        // get the passages
        int primaryWindowSize = params.getInt(OWLParams.PRIMARY_WINDOW_SIZE, DEFAULT_PRIMARY_WINDOW_SIZE);
        int adjacentWindowSize = params.getInt(OWLParams.ADJACENT_WINDOW_SIZE, DEFAULT_ADJACENT_WINDOW_SIZE);
        int secondaryWindowSize = params.getInt(OWLParams.SECONDARY_WINDOW_SIZE, DEFAULT_SECONDARY_WINDOW_SIZE);

        WindowBuildingTVM tvm = new WindowBuildingTVM(primaryWindowSize, adjacentWindowSize, secondaryWindowSize);
        PassagePriorityQueue rankedPassages = new PassagePriorityQueue();

        // intersect w/ doclist
        DocList docList = rb.getResults().docList;
        log.warn("Before Spans");

        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            // build up the window
            log.warn("Iterating through spans");
            if (docList.exists(spans.docID())) {
                tvm.spanStart = spans.startPosition();
                tvm.spanEnd = spans.endPosition();

                // tvm.terms
                Terms terms = reader.getTermVector(spans.docID(), sQuery.getField());
                tvm.map(terms, spans);

                // The entries map contains the window, do some ranking of it
                if (tvm.passage.terms.isEmpty() == false) {
                    log.debug("Candidate: Doc: {} Start: {} End: {} ", new Object[] { spans.docID(), spans.startPosition(), spans.endPosition() });
                }
                tvm.passage.lDocId = spans.docID();
                tvm.passage.field = sQuery.getField();

                // score this window
                try {
                    addPassage(tvm.passage, rankedPassages, termWeights, bigramWeights, adjWeight, secondAdjWeight, bigramWeight);
                } catch (CloneNotSupportedException e) {
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Internal error cloning Passage", e);
                }

                // clear out the entries for the next round
                tvm.passage.clear();
            }
        }
    }
}

0 个答案:

没有答案