35 files changed, 7347 insertions, 0 deletions
diff --git a/src/main/java/org/apache/lucene/analysis/CustomAnalyzerWrapper.java b/src/main/java/org/apache/lucene/analysis/CustomAnalyzerWrapper.java
new file mode 100644
index 0000000..6ea95e4
--- /dev/null
+++ b/src/main/java/org/apache/lucene/analysis/CustomAnalyzerWrapper.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.Reader;
+
+/**
+ * Similar to Lucene {@link AnalyzerWrapper} but actually allows to set the reuse strategy....
+ * //TODO add to lucene the ability to set it...
+ */
+public abstract class CustomAnalyzerWrapper extends Analyzer {
+
+    /**
+     * Creates a new CustomAnalyzerWrapper.  Since the {@link Analyzer.ReuseStrategy} of
+     * the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed
+     */
+    protected CustomAnalyzerWrapper(ReuseStrategy reuseStrategy) {
+        super(reuseStrategy);
+    }
+
+    /**
+     * Retrieves the wrapped Analyzer appropriate for analyzing the field with
+     * the given name
+     *
+     * @param fieldName Name of the field which is to be analyzed
+     * @return Analyzer for the field with the given name.  Assumed to be non-null
+     */
+    protected abstract Analyzer getWrappedAnalyzer(String fieldName);
+
+    /**
+     * Wraps / alters the given TokenStreamComponents, taken from the wrapped
+     * Analyzer, to form new components.  It is through this method that new
+     * TokenFilters can be added by AnalyzerWrappers.
+     *
+     * @param fieldName  Name of the field which is to be analyzed
+     * @param components TokenStreamComponents taken from the wrapped Analyzer
+     * @return Wrapped / altered TokenStreamComponents.
+     */
+    protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
+
+    @Override
+    protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
+        return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
+    }
+
+    @Override
+    public int getPositionIncrementGap(String fieldName) {
+        return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
+    }
+
+    @Override
+    public int getOffsetGap(String fieldName) {
+        return getWrappedAnalyzer(fieldName).getOffsetGap(fieldName);
+    }
+
+    @Override
+    public final Reader initReader(String fieldName, Reader reader) {
+        return getWrappedAnalyzer(fieldName).initReader(fieldName, reader);
+    }
+}
diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java
new file mode 100644
index 0000000..ea93850
--- /dev/null
+++ b/src/main/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+import java.io.IOException;
+
+/**
+ * A token filter that truncates tokens.
+ */
+public class TruncateTokenFilter extends TokenFilter {
+
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+
+    private final int size;
+
+    public TruncateTokenFilter(TokenStream in, int size) {
+        super(in);
+        this.size = size;
+    }
+
+    @Override
+    public final boolean incrementToken() throws IOException {
+        if (input.incrementToken()) {
+            final int length = termAttribute.length();
+            if (length > size) {
+                termAttribute.setLength(size);
+            }
+            return true;
+        } else {
+            return false;
+        }
+    }
+}
+
+
diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java
new file mode 100644
index 0000000..4ccad58
--- /dev/null
+++ b/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
+
+import java.io.IOException;
+
+/**
+ * A token filter that generates unique tokens. Can remove unique tokens only on the same
+ * position increments as well.
+ */
+public class UniqueTokenFilter extends TokenFilter {
+
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+    private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
+
+    // use a fixed version, as we don't care about case sensitivity.
+    private final CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
+    private final boolean onlyOnSamePosition;
+
+    public UniqueTokenFilter(TokenStream in) {
+        this(in, false);
+    }
+
+    public UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
+        super(in);
+        this.onlyOnSamePosition = onlyOnSamePosition;
+    }
+
+    @Override
+    public final boolean incrementToken() throws IOException {
+        while (input.incrementToken()) {
+            final char term[] = termAttribute.buffer();
+            final int length = termAttribute.length();
+
+            boolean duplicate;
+            if (onlyOnSamePosition) {
+                final int posIncrement = posIncAttribute.getPositionIncrement();
+                if (posIncrement > 0) {
+                    previous.clear();
+                }
+
+                duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
+            } else {
+                duplicate = previous.contains(term, 0, length);
+            }
+
+            // clone the term, and add to the set of seen terms.
+            char saved[] = new char[length];
+            System.arraycopy(term, 0, saved, 0, length);
+            previous.add(saved);
+
+            if (!duplicate) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    @Override
+    public final void reset() throws IOException {
+        super.reset();
+        previous.clear();
+    }
+}
+
+
diff --git a/src/main/java/org/apache/lucene/index/TrackingConcurrentMergeScheduler.java b/src/main/java/org/apache/lucene/index/TrackingConcurrentMergeScheduler.java
new file mode 100644
index 0000000..66b3328
--- /dev/null
+++ b/src/main/java/org/apache/lucene/index/TrackingConcurrentMergeScheduler.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.index;
+
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.metrics.CounterMetric;
+import org.elasticsearch.common.metrics.MeanMetric;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
+import org.elasticsearch.index.merge.OnGoingMerge;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Set;
+
+/**
+ * An extension to the {@link ConcurrentMergeScheduler} that provides tracking on merge times, total
+ * and current merges.
+ */
+public class TrackingConcurrentMergeScheduler extends ConcurrentMergeScheduler {
+
+    protected final ESLogger logger;
+
+    private final MeanMetric totalMerges = new MeanMetric();
+    private final CounterMetric totalMergesNumDocs = new CounterMetric();
+    private final CounterMetric totalMergesSizeInBytes = new CounterMetric();
+    private final CounterMetric currentMerges = new CounterMetric();
+    private final CounterMetric currentMergesNumDocs = new CounterMetric();
+    private final CounterMetric currentMergesSizeInBytes = new CounterMetric();
+
+    private final Set<OnGoingMerge> onGoingMerges = ConcurrentCollections.newConcurrentSet();
+    private final Set<OnGoingMerge> readOnlyOnGoingMerges = Collections.unmodifiableSet(onGoingMerges);
+
+    public TrackingConcurrentMergeScheduler(ESLogger logger) {
+        super();
+        this.logger = logger;
+    }
+
+    public long totalMerges() {
+        return totalMerges.count();
+    }
+
+    public long totalMergeTime() {
+        return totalMerges.sum();
+    }
+
+    public long totalMergeNumDocs() {
+        return totalMergesNumDocs.count();
+    }
+
+    public long totalMergeSizeInBytes() {
+        return totalMergesSizeInBytes.count();
+    }
+
+    public long currentMerges() {
+        return currentMerges.count();
+    }
+
+    public long currentMergesNumDocs() {
+        return currentMergesNumDocs.count();
+    }
+
+    public long currentMergesSizeInBytes() {
+        return currentMergesSizeInBytes.count();
+    }
+
+    public Set<OnGoingMerge> onGoingMerges() {
+        return readOnlyOnGoingMerges;
+    }
+
+    @Override
+    protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
+        int totalNumDocs = merge.totalNumDocs();
+        // don't used #totalBytesSize() since need to be executed under IW lock, might be fixed in future Lucene version
+        long totalSizeInBytes = merge.estimatedMergeBytes;
+        long time = System.currentTimeMillis();
+        currentMerges.inc();
+        currentMergesNumDocs.inc(totalNumDocs);
+        currentMergesSizeInBytes.inc(totalSizeInBytes);
+
+        OnGoingMerge onGoingMerge = new OnGoingMerge(merge);
+        onGoingMerges.add(onGoingMerge);
+
+        if (logger.isTraceEnabled()) {
+            logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
+        }
+        try {
+            beforeMerge(onGoingMerge);
+            super.doMerge(merge);
+        } finally {
+            long took = System.currentTimeMillis() - time;
+
+            onGoingMerges.remove(onGoingMerge);
+            afterMerge(onGoingMerge);
+
+            currentMerges.dec();
+            currentMergesNumDocs.dec(totalNumDocs);
+            currentMergesSizeInBytes.dec(totalSizeInBytes);
+
+            totalMergesNumDocs.inc(totalNumDocs);
+            totalMergesSizeInBytes.inc(totalSizeInBytes);
+            totalMerges.inc(took);
+            if (took > 20000) { // if more than 20 seconds, DEBUG log it
+                logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
+            } else if (logger.isTraceEnabled()) {
+                logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
+            }
+        }
+    }
+
+    /**
+     * A callback allowing for custom logic before an actual merge starts.
+     */
+    protected void beforeMerge(OnGoingMerge merge) {
+
+    }
+
+    /**
+     * A callback allowing for custom logic before an actual merge starts.
+     */
+    protected void afterMerge(OnGoingMerge merge) {
+
+    }
+
+    @Override
+    public MergeScheduler clone() {
+        // Lucene IW makes a clone internally but since we hold on to this instance 
+        // the clone will just be the identity.
+        return this;
+    }
+}
+\ No newline at end of file
diff --git a/src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java b/src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java
new file mode 100644
index 0000000..bd3ed9d
--- /dev/null
+++ b/src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.index;
+
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.metrics.CounterMetric;
+import org.elasticsearch.common.metrics.MeanMetric;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
+import org.elasticsearch.index.merge.OnGoingMerge;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Set;
+
+// LUCENE MONITOR - Copied from SerialMergeScheduler
+public class TrackingSerialMergeScheduler extends MergeScheduler {
+
+    protected final ESLogger logger;
+
+    private final MeanMetric totalMerges = new MeanMetric();
+    private final CounterMetric totalMergesNumDocs = new CounterMetric();
+    private final CounterMetric totalMergesSizeInBytes = new CounterMetric();
+    private final CounterMetric currentMerges = new CounterMetric();
+    private final CounterMetric currentMergesNumDocs = new CounterMetric();
+    private final CounterMetric currentMergesSizeInBytes = new CounterMetric();
+
+    private final Set<OnGoingMerge> onGoingMerges = ConcurrentCollections.newConcurrentSet();
+    private final Set<OnGoingMerge> readOnlyOnGoingMerges = Collections.unmodifiableSet(onGoingMerges);
+
+    public TrackingSerialMergeScheduler(ESLogger logger) {
+        this.logger = logger;
+    }
+
+    public long totalMerges() {
+        return totalMerges.count();
+    }
+
+    public long totalMergeTime() {
+        return totalMerges.sum();
+    }
+
+    public long totalMergeNumDocs() {
+        return totalMergesNumDocs.count();
+    }
+
+    public long totalMergeSizeInBytes() {
+        return totalMergesSizeInBytes.count();
+    }
+
+    public long currentMerges() {
+        return currentMerges.count();
+    }
+
+    public long currentMergesNumDocs() {
+        return currentMergesNumDocs.count();
+    }
+
+    public long currentMergesSizeInBytes() {
+        return currentMergesSizeInBytes.count();
+    }
+
+    public Set<OnGoingMerge> onGoingMerges() {
+        return readOnlyOnGoingMerges;
+    }
+
+    /**
+     * Just do the merges in sequence. We do this
+     * "synchronized" so that even if the application is using
+     * multiple threads, only one merge may run at a time.
+     */
+    @Override
+    synchronized public void merge(IndexWriter writer) throws CorruptIndexException, IOException {
+        while (true) {
+            MergePolicy.OneMerge merge = writer.getNextMerge();
+            if (merge == null)
+                break;
+
+            // different from serial merge, call mergeInit here so we get the correct stats
+            // mergeInit can be called several times without side affects (checks on merge.info not being null)
+            writer.mergeInit(merge);
+
+            int totalNumDocs = merge.totalNumDocs();
+            // don't used #totalBytesSize() since need to be executed under IW lock, might be fixed in future Lucene version
+            long totalSizeInBytes = merge.estimatedMergeBytes;
+            long time = System.currentTimeMillis();
+            currentMerges.inc();
+            currentMergesNumDocs.inc(totalNumDocs);
+            currentMergesSizeInBytes.inc(totalSizeInBytes);
+
+            OnGoingMerge onGoingMerge = new OnGoingMerge(merge);
+            onGoingMerges.add(onGoingMerge);
+
+            // sadly, segment name is not available since mergeInit is called from merge itself...
+            if (logger.isTraceEnabled()) {
+                logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
+            }
+            try {
+                beforeMerge(onGoingMerge);
+                writer.merge(merge);
+            } finally {
+                long took = System.currentTimeMillis() - time;
+
+                onGoingMerges.remove(onGoingMerge);
+                afterMerge(onGoingMerge);
+
+                currentMerges.dec();
+                currentMergesNumDocs.dec(totalNumDocs);
+                currentMergesSizeInBytes.dec(totalSizeInBytes);
+
+                totalMergesNumDocs.inc(totalNumDocs);
+                totalMergesSizeInBytes.inc(totalSizeInBytes);
+                totalMerges.inc(took);
+                if (took > 20000) { // if more than 20 seconds, DEBUG log it
+                    logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
+                } else if (logger.isTraceEnabled()) {
+                    logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
+                }
+            }
+        }
+    }
+
+    /**
+     * A callback allowing for custom logic before an actual merge starts.
+     */
+    protected void beforeMerge(OnGoingMerge merge) {
+
+    }
+
+    /**
+     * A callback allowing for custom logic before an actual merge starts.
+     */
+    protected void afterMerge(OnGoingMerge merge) {
+
+    }
+
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public MergeScheduler clone() {
+        // Lucene IW makes a clone internally but since we hold on to this instance 
+        // the clone will just be the identity.
+        return this;
+    }
+}
+\ No newline at end of file
diff --git a/src/main/java/org/apache/lucene/index/memory/ExtendedMemoryIndex.java b/src/main/java/org/apache/lucene/index/memory/ExtendedMemoryIndex.java
new file mode 100644
index 0000000..5f99fce
--- /dev/null
+++ b/src/main/java/org/apache/lucene/index/memory/ExtendedMemoryIndex.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.index.memory;
+
+/**
+ * This class overwrites {@link MemoryIndex} to make the reuse constructor visible.
+ */
+public final class ExtendedMemoryIndex extends MemoryIndex {
+
+    public ExtendedMemoryIndex(boolean storeOffsets, long maxReusedBytes) {
+        super(storeOffsets, maxReusedBytes);
+    }
+
+}
diff --git a/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java b/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java
new file mode 100644
index 0000000..e1dcf58
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.queries;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.elasticsearch.common.lucene.search.Queries;
+import org.elasticsearch.index.mapper.FieldMapper;
+
+import java.io.IOException;
+
+/**
+ * Extended version of {@link CommonTermsQuery} that allows to pass in a
+ * <tt>minimumNumberShouldMatch</tt> specification that uses the actual num of high frequent terms
+ * to calculate the minimum matching terms.
+ */
+public class ExtendedCommonTermsQuery extends CommonTermsQuery {
+
+    private final FieldMapper<?> mapper;
+
+    public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord, FieldMapper<?> mapper) {
+        super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
+        this.mapper = mapper;
+    }
+
+    private String lowFreqMinNumShouldMatchSpec;
+    private String highFreqMinNumShouldMatchSpec;
+
+    @Override
+    protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
+        return calcMinimumNumberShouldMatch(lowFreqMinNumShouldMatchSpec, numOptional);
+    }
+
+    protected int calcMinimumNumberShouldMatch(String spec, int numOptional) {
+        if (spec == null) {
+            return 0;
+        }
+        return Queries.calculateMinShouldMatch(numOptional, spec);
+    }
+
+    @Override
+    protected int calcHighFreqMinimumNumberShouldMatch(int numOptional) {
+        return calcMinimumNumberShouldMatch(highFreqMinNumShouldMatchSpec, numOptional);
+    }
+
+    public void setHighFreqMinimumNumberShouldMatch(String spec) {
+        this.highFreqMinNumShouldMatchSpec = spec;
+    }
+
+    public String getHighFreqMinimumNumberShouldMatchSpec() {
+        return highFreqMinNumShouldMatchSpec;
+    }
+
+    public void setLowFreqMinimumNumberShouldMatch(String spec) {
+        this.lowFreqMinNumShouldMatchSpec = spec;
+    }
+
+    public String getLowFreqMinimumNumberShouldMatchSpec() {
+        return lowFreqMinNumShouldMatchSpec;
+    }
+
+    // LUCENE-UPGRADE: remove this method if on 4.8
+    @Override
+    public Query rewrite(IndexReader reader) throws IOException {
+        if (this.terms.isEmpty()) {
+            return new BooleanQuery();
+        } else if (this.terms.size() == 1) {
+            final Query tq = newTermQuery(this.terms.get(0), null);
+            tq.setBoost(getBoost());
+            return tq;
+        }
+        return super.rewrite(reader);
+    }
+
+    // LUCENE-UPGRADE: remove this method if on 4.8
+    @Override
+    protected Query buildQuery(final int maxDoc,
+                               final TermContext[] contextArray, final Term[] queryTerms) {
+        BooleanQuery lowFreq = new BooleanQuery(disableCoord);
+        BooleanQuery highFreq = new BooleanQuery(disableCoord);
+        highFreq.setBoost(highFreqBoost);
+        lowFreq.setBoost(lowFreqBoost);
+        BooleanQuery query = new BooleanQuery(true);
+        for (int i = 0; i < queryTerms.length; i++) {
+            TermContext termContext = contextArray[i];
+            if (termContext == null) {
+                lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
+            } else {
+                if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
+                        || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
+                    highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
+                } else {
+                    lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
+                }
+            }
+
+        }
+        final int numLowFreqClauses = lowFreq.clauses().size();
+        final int numHighFreqClauses = highFreq.clauses().size();
+        if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
+            int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
+            lowFreq.setMinimumNumberShouldMatch(minMustMatch);
+        }
+        if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
+            int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
+            highFreq.setMinimumNumberShouldMatch(minMustMatch);
+        }
+        if (lowFreq.clauses().isEmpty()) {
+          /*
+           * if lowFreq is empty we rewrite the high freq terms in a conjunction to
+           * prevent slow queries.
+           */
+            if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
+                for (BooleanClause booleanClause : highFreq) {
+                    booleanClause.setOccur(Occur.MUST);
+                }
+            }
+            highFreq.setBoost(getBoost());
+            return highFreq;
+        } else if (highFreq.clauses().isEmpty()) {
+            // only do low freq terms - we don't have high freq terms
+            lowFreq.setBoost(getBoost());
+            return lowFreq;
+        } else {
+            query.add(highFreq, Occur.SHOULD);
+            query.add(lowFreq, Occur.MUST);
+            query.setBoost(getBoost());
+            return query;
+        }
+    }
+
+    //@Override
+    // LUCENE-UPGRADE: remove this method if on 4.8
+    protected Query newTermQuery(Term term, TermContext context) {
+        if (mapper == null) {
+            // this should be super.newTermQuery(term, context) once it's available in the super class
+            return context == null ? new TermQuery(term) : new TermQuery(term, context);
+        }
+        final Query query = mapper.queryStringTermQuery(term);
+        if (query == null) {
+            // this should be super.newTermQuery(term, context) once it's available in the super class
+            return context == null ? new TermQuery(term) : new TermQuery(term, context);
+        } else {
+            return query;
+        }
+    }
+}
diff --git a/src/main/java/org/apache/lucene/queries/XTermsFilter.java b/src/main/java/org/apache/lucene/queries/XTermsFilter.java
new file mode 100644
index 0000000..ea11f03
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queries/XTermsFilter.java
@@ -0,0 +1,328 @@
+package org.apache.lucene.queries;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Constructs a filter for docs matching any of the terms added to this class.
+ * Unlike a RangeFilter this can be used for filtering on multiple terms that are not necessarily in
+ * a sequence. An example might be a collection of primary keys from a database query result or perhaps
+ * a choice of "category" labels picked by the end user. As a filter, this is much faster than the
+ * equivalent query (a BooleanQuery with many "should" TermQueries)
+ */
+public final class XTermsFilter extends Filter {
+
+    static {
+        assert Version.LUCENE_46 == org.elasticsearch.Version.CURRENT.luceneVersion : "Remove this once we are on LUCENE_48 - see LUCENE-5502";
+    }
+
+    /*
+     * this class is often used for large number of terms in a single field.
+     * to optimize for this case and to be filter-cache friendly we 
+     * serialize all terms into a single byte array and store offsets
+     * in a parallel array to keep the # of object constant and speed up
+     * equals / hashcode.
+     * 
+     * This adds quite a bit of complexity but allows large term filters to
+     * be efficient for GC and cache-lookups
+     */
+    private final int[] offsets;
+    private final byte[] termsBytes;
+    private final TermsAndField[] termsAndFields;
+    private final int hashCode; // cached hashcode for fast cache lookups
+    private static final int PRIME = 31;
+
+    /**
+     * Creates a new {@link XTermsFilter} from the given list. The list
+     * can contain duplicate terms and multiple fields.
+     */
+    public XTermsFilter(final List<Term> terms) {
+        this(new FieldAndTermEnum() {
+            // we need to sort for deduplication and to have a common cache key
+            final Iterator<Term> iter = sort(terms).iterator();
+            @Override
+            public BytesRef next() {
+                if (iter.hasNext()) {
+                    Term next = iter.next();
+                    field = next.field();
+                    return next.bytes();
+                }
+                return null;
+            }}, terms.size());
+    }
+
+    /**
+     * Creates a new {@link XTermsFilter} from the given {@link BytesRef} list for
+     * a single field.
+     */
+    public XTermsFilter(final String field, final List<BytesRef> terms) {
+        this(new FieldAndTermEnum(field) {
+            // we need to sort for deduplication and to have a common cache key
+            final Iterator<BytesRef> iter = sort(terms).iterator();
+            @Override
+            public BytesRef next() {
+                if (iter.hasNext()) {
+                    return iter.next();
+                }
+                return null;
+            }
+        }, terms.size());
+    }
+
+    /**
+     * Creates a new {@link XTermsFilter} from the given {@link BytesRef} array for
+     * a single field.
+     */
+    public XTermsFilter(final String field, final BytesRef...terms) {
+        // this ctor prevents unnecessary Term creations
+        this(field, Arrays.asList(terms));
+    }
+
+    /**
+     * Creates a new {@link XTermsFilter} from the given array. The array can
+     * contain duplicate terms and multiple fields.
+     */
+    public XTermsFilter(final Term... terms) {
+        this(Arrays.asList(terms));
+    }
+
+
+    private XTermsFilter(FieldAndTermEnum iter, int length) {
+        // TODO: maybe use oal.index.PrefixCodedTerms instead?
+        // If number of terms is more than a few hundred it
+        // should be a win
+
+        // TODO: we also pack terms in FieldCache/DocValues
+        // ... maybe we can refactor to share that code
+
+        // TODO: yet another option is to build the union of the terms in
+        // an automaton an call intersect on the termsenum if the density is high
+
+        int hash = 9;
+        byte[] serializedTerms = new byte[0];
+        this.offsets = new int[length+1];
+        int lastEndOffset = 0;
+        int index = 0;
+        ArrayList<TermsAndField> termsAndFields = new ArrayList<TermsAndField>();
+        TermsAndField lastTermsAndField = null;
+        BytesRef previousTerm = null;
+        String previousField = null;
+        BytesRef currentTerm;
+        String currentField;
+        while((currentTerm = iter.next()) != null) {
+            currentField = iter.field();
+            if (currentField == null) {
+                throw new IllegalArgumentException("Field must not be null");
+            }
+            if (previousField != null) {
+                // deduplicate
+                if (previousField.equals(currentField)) {
+                    if (previousTerm.bytesEquals(currentTerm)){
+                        continue;
+                    }
+                } else {
+                    final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
+                    lastTermsAndField = new TermsAndField(start, index, previousField);
+                    termsAndFields.add(lastTermsAndField);
+                }
+            }
+            hash = PRIME *  hash + currentField.hashCode();
+            hash = PRIME *  hash + currentTerm.hashCode();
+            if (serializedTerms.length < lastEndOffset+currentTerm.length) {
+                serializedTerms = ArrayUtil.grow(serializedTerms, lastEndOffset+currentTerm.length);
+            }
+            System.arraycopy(currentTerm.bytes, currentTerm.offset, serializedTerms, lastEndOffset, currentTerm.length);
+            offsets[index] = lastEndOffset;
+            lastEndOffset += currentTerm.length;
+            index++;
+            previousTerm = currentTerm;
+            previousField = currentField;
+        }
+        offsets[index] = lastEndOffset;
+        final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
+        lastTermsAndField = new TermsAndField(start, index, previousField);
+        termsAndFields.add(lastTermsAndField);
+        this.termsBytes = ArrayUtil.shrink(serializedTerms, lastEndOffset);
+        this.termsAndFields = termsAndFields.toArray(new TermsAndField[termsAndFields.size()]);
+        this.hashCode = hash;
+
+    }
+
+
+    @Override
+    public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+        final AtomicReader reader = context.reader();
+        FixedBitSet result = null;  // lazy init if needed - no need to create a big bitset ahead of time
+        final Fields fields = reader.fields();
+        final BytesRef spare = new BytesRef(this.termsBytes);
+        if (fields == null) {
+            return result;
+        }
+        Terms terms = null;
+        TermsEnum termsEnum = null;
+        DocsEnum docs = null;
+        for (TermsAndField termsAndField : this.termsAndFields) {
+            if ((terms = fields.terms(termsAndField.field)) != null) {
+                termsEnum = terms.iterator(termsEnum); // this won't return null
+                for (int i = termsAndField.start; i < termsAndField.end; i++) {
+                    spare.offset = offsets[i];
+                    spare.length = offsets[i+1] - offsets[i];
+                    if (termsEnum.seekExact(spare)) {
+                        docs = termsEnum.docs(acceptDocs, docs, DocsEnum.FLAG_NONE); // no freq since we don't need them
+                        if (result == null) {
+                            if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                                result = new FixedBitSet(reader.maxDoc());
+                                // lazy init but don't do it in the hot loop since we could read many docs
+                                result.set(docs.docID());
+                            }
+                        }
+                        while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                            result.set(docs.docID());
+                        }
+                    }
+                }
+            }
+        }
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) {
+            return true;
+        }
+        if ((obj == null) || (obj.getClass() != this.getClass())) {
+            return false;
+        }
+
+        XTermsFilter test = (XTermsFilter) obj;
+        // first check the fields before even comparing the bytes
+        if (test.hashCode == hashCode && Arrays.equals(termsAndFields, test.termsAndFields)) {
+            int lastOffset = termsAndFields[termsAndFields.length - 1].end;
+            // compare offsets since we sort they must be identical
+            if (ArrayUtil.equals(offsets, 0, test.offsets, 0, lastOffset + 1)) {
+                // straight byte comparison since we sort they must be identical
+                return  ArrayUtil.equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]);
+            }
+        }
+        return false;
+    }
+
+    @Override
+    public int hashCode() {
+        return hashCode;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        BytesRef spare = new BytesRef(termsBytes);
+        boolean first = true;
+        for (int i = 0; i < termsAndFields.length; i++) {
+            TermsAndField current = termsAndFields[i];
+            for (int j = current.start; j < current.end; j++) {
+                spare.offset = offsets[j];
+                spare.length = offsets[j+1] - offsets[j];
+                if (!first) {
+                    builder.append(' ');
+                }
+                first = false;
+                builder.append(current.field).append(':');
+                builder.append(spare.utf8ToString());
+            }
+        }
+
+        return builder.toString();
+    }
+
+    private static final class TermsAndField {
+        final int start;
+        final int end;
+        final String field;
+
+
+        TermsAndField(int start, int end, String field) {
+            super();
+            this.start = start;
+            this.end = end;
+            this.field = field;
+        }
+
+        @Override
+        public int hashCode() {
+            final int prime = 31;
+            int result = 1;
+            result = prime * result + ((field == null) ? 0 : field.hashCode());
+            result = prime * result + end;
+            result = prime * result + start;
+            return result;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (this == obj) return true;
+            if (obj == null) return false;
+            if (getClass() != obj.getClass()) return false;
+            TermsAndField other = (TermsAndField) obj;
+            if (field == null) {
+                if (other.field != null) return false;
+            } else if (!field.equals(other.field)) return false;
+            if (end != other.end) return false;
+            if (start != other.start) return false;
+            return true;
+        }
+
+    }
+
+    private static abstract class FieldAndTermEnum {
+        protected String field;
+
+        public abstract BytesRef next();
+
+        public FieldAndTermEnum() {}
+
+        public FieldAndTermEnum(String field) { this.field = field; }
+
+        public String field() {
+            return field;
+        }
+    }
+
+    /*
+     * simple utility that returns the in-place sorted list
+     */
+    private static <T extends Comparable<? super T>> List<T> sort(List<T> toSort) {
+        if (toSort.isEmpty()) {
+            throw new IllegalArgumentException("no terms provided");
+        }
+        Collections.sort(toSort);
+        return toSort;
+    }
+}
diff --git a/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java b/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java
new file mode 100644
index 0000000..91bc21d
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java
@@ -0,0 +1,521 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.queryparser;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.QueryBuilder;
+import org.apache.lucene.util.Version;
+import org.elasticsearch.common.lucene.Lucene;
+
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * XSimpleQueryParser is used to parse human readable query syntax.
+ * <p>
+ * The main idea behind this parser is that a person should be able to type
+ * whatever they want to represent a query, and this parser will do its best
+ * to interpret what to search for no matter how poorly composed the request
+ * may be. Tokens are considered to be any of a term, phrase, or subquery for the
+ * operations described below.  Whitespace including ' ' '\n' '\r' and '\t'
+ * and certain operators may be used to delimit tokens ( ) + | " .
+ * <p>
+ * Any errors in query syntax will be ignored and the parser will attempt
+ * to decipher what it can; however, this may mean odd or unexpected results.
+ * <h4>Query Operators</h4>
+ * <ul>
+ *  <li>'{@code +}' specifies {@code AND} operation: <tt>token1+token2</tt>
+ *  <li>'{@code |}' specifies {@code OR} operation: <tt>token1|token2</tt>
+ *  <li>'{@code -}' negates a single token: <tt>-token0</tt>
+ *  <li>'{@code "}' creates phrases of terms: <tt>"term1 term2 ..."</tt>
+ *  <li>'{@code *}' at the end of terms specifies prefix query: <tt>term*</tt>
+ *  <li>'{@code (}' and '{@code )}' specifies precedence: <tt>token1 + (token2 | token3)</tt>
+ * </ul>
+ * <p>
+ * The {@link #setDefaultOperator default operator} is {@code OR} if no other operator is specified.
+ * For example, the following will {@code OR} {@code token1} and {@code token2} together:
+ * <tt>token1 token2</tt>
+ * <p>
+ * Normal operator precedence will be simple order from right to left.
+ * For example, the following will evaluate {@code token1 OR token2} first,
+ * then {@code AND} with {@code token3}:
+ * <blockquote>token1 | token2 + token3</blockquote>
+ * <h4>Escaping</h4>
+ * <p>
+ * An individual term may contain any possible character with certain characters
+ * requiring escaping using a '{@code \}'.  The following characters will need to be escaped in
+ * terms and phrases:
+ * {@code + | " ( ) ' \}
+ * <p>
+ * The '{@code -}' operator is a special case.  On individual terms (not phrases) the first
+ * character of a term that is {@code -} must be escaped; however, any '{@code -}' characters
+ * beyond the first character do not need to be escaped.
+ * For example:
+ * <ul>
+ *   <li>{@code -term1}   -- Specifies {@code NOT} operation against {@code term1}
+ *   <li>{@code \-term1}  -- Searches for the term {@code -term1}.
+ *   <li>{@code term-1}   -- Searches for the term {@code term-1}.
+ *   <li>{@code term\-1}  -- Searches for the term {@code term-1}.
+ * </ul>
+ * <p>
+ * The '{@code *}' operator is a special case. On individual terms (not phrases) the last
+ * character of a term that is '{@code *}' must be escaped; however, any '{@code *}' characters
+ * before the last character do not need to be escaped:
+ * <ul>
+ *   <li>{@code term1*}  --  Searches for the prefix {@code term1}
+ *   <li>{@code term1\*} --  Searches for the term {@code term1*}
+ *   <li>{@code term*1}  --  Searches for the term {@code term*1}
+ *   <li>{@code term\*1} --  Searches for the term {@code term*1}
+ * </ul>
+ * <p>
+ * Note that above examples consider the terms before text processing.
+ */
+public class XSimpleQueryParser extends QueryBuilder {
+
+    static {
+        assert Version.LUCENE_46.onOrAfter(Lucene.VERSION) : "Lucene 4.7 adds SimpleQueryParser, remove me!";
+    }
+
+    /** Map of fields to query against with their weights */
+    protected final Map<String,Float> weights;
+    /** flags to the parser (to turn features on/off) */
+    protected final int flags;
+
+    /** Enables {@code AND} operator (+) */
+    public static final int AND_OPERATOR         = 1<<0;
+    /** Enables {@code NOT} operator (-) */
+    public static final int NOT_OPERATOR         = 1<<1;
+    /** Enables {@code OR} operator (|) */
+    public static final int OR_OPERATOR          = 1<<2;
+    /** Enables {@code PREFIX} operator (*) */
+    public static final int PREFIX_OPERATOR      = 1<<3;
+    /** Enables {@code PHRASE} operator (") */
+    public static final int PHRASE_OPERATOR      = 1<<4;
+    /** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */
+    public static final int PRECEDENCE_OPERATORS = 1<<5;
+    /** Enables {@code ESCAPE} operator (\) */
+    public static final int ESCAPE_OPERATOR      = 1<<6;
+    /** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
+    public static final int WHITESPACE_OPERATOR  = 1<<7;
+
+    private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD;
+
+    /** Creates a new parser searching over a single field. */
+    public XSimpleQueryParser(Analyzer analyzer, String field) {
+        this(analyzer, Collections.singletonMap(field, 1.0F));
+    }
+
+    /** Creates a new parser searching over multiple fields with different weights. */
+    public XSimpleQueryParser(Analyzer analyzer, Map<String, Float> weights) {
+        this(analyzer, weights, -1);
+    }
+
+    /** Creates a new parser with custom flags used to enable/disable certain features. */
+    public XSimpleQueryParser(Analyzer analyzer, Map<String, Float> weights, int flags) {
+        super(analyzer);
+        this.weights = weights;
+        this.flags = flags;
+    }
+
+    /** Parses the query text and returns parsed query (or null if empty) */
+    public Query parse(String queryText) {
+        char data[] = queryText.toCharArray();
+        char buffer[] = new char[data.length];
+
+        State state = new State(data, buffer, 0, data.length);
+        parseSubQuery(state);
+        return state.top;
+    }
+
+    private void parseSubQuery(State state) {
+        while (state.index < state.length) {
+            if (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0) {
+                // the beginning of a subquery has been found
+                consumeSubQuery(state);
+            } else if (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0) {
+                // this is an extraneous character so it is ignored
+                ++state.index;
+            } else if (state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0) {
+                // the beginning of a phrase has been found
+                consumePhrase(state);
+            } else if (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0) {
+                // an and operation has been explicitly set
+                // if an operation has already been set this one is ignored
+                // if a term (or phrase or subquery) has not been found yet the
+                // operation is also ignored since there is no previous
+                // term (or phrase or subquery) to and with
+                if (state.currentOperation == null && state.top != null) {
+                    state.currentOperation = BooleanClause.Occur.MUST;
+                }
+
+                ++state.index;
+            } else if (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0) {
+                // an or operation has been explicitly set
+                // if an operation has already been set this one is ignored
+                // if a term (or phrase or subquery) has not been found yet the
+                // operation is also ignored since there is no previous
+                // term (or phrase or subquery) to or with
+                if (state.currentOperation == null && state.top != null) {
+                    state.currentOperation = BooleanClause.Occur.SHOULD;
+                }
+
+                ++state.index;
+            } else if (state.data[state.index] == '-' && (flags & NOT_OPERATOR) != 0) {
+                // a not operator has been found, so increase the not count
+                // two not operators in a row negate each other
+                ++state.not;
+                ++state.index;
+
+                // continue so the not operator is not reset
+                // before the next character is determined
+                continue;
+            } else if ((state.data[state.index] == ' '
+                    || state.data[state.index] == '\t'
+                    || state.data[state.index] == '\n'
+                    || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0) {
+                // ignore any whitespace found as it may have already been
+                // used a delimiter across a term (or phrase or subquery)
+                // or is simply extraneous
+                ++state.index;
+            } else {
+                // the beginning of a token has been found
+                consumeToken(state);
+            }
+
+            // reset the not operator as even whitespace is not allowed when
+            // specifying the not operation for a term (or phrase or subquery)
+            state.not = 0;
+        }
+    }
+
+    private void consumeSubQuery(State state) {
+        assert (flags & PRECEDENCE_OPERATORS) != 0;
+        int start = ++state.index;
+        int precedence = 1;
+        boolean escaped = false;
+
+        while (state.index < state.length) {
+            if (!escaped) {
+                if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
+                    // an escape character has been found so
+                    // whatever character is next will become
+                    // part of the subquery unless the escape
+                    // character is the last one in the data
+                    escaped = true;
+                    ++state.index;
+
+                    continue;
+                } else if (state.data[state.index] == '(') {
+                    // increase the precedence as there is a
+                    // subquery in the current subquery
+                    ++precedence;
+                } else if (state.data[state.index] == ')') {
+                    --precedence;
+
+                    if (precedence == 0) {
+                        // this should be the end of the subquery
+                        // all characters found will used for
+                        // creating the subquery
+                        break;
+                    }
+                }
+            }
+
+            escaped = false;
+            ++state.index;
+        }
+
+        if (state.index == state.length) {
+            // a closing parenthesis was never found so the opening
+            // parenthesis is considered extraneous and will be ignored
+            state.index = start;
+        } else if (state.index == start) {
+            // a closing parenthesis was found immediately after the opening
+            // parenthesis so the current operation is reset since it would
+            // have been applied to this subquery
+            state.currentOperation = null;
+
+            ++state.index;
+        } else {
+            // a complete subquery has been found and is recursively parsed by
+            // starting over with a new state object
+            State subState = new State(state.data, state.buffer, start, state.index);
+            parseSubQuery(subState);
+            buildQueryTree(state, subState.top);
+
+            ++state.index;
+        }
+    }
+
+    private void consumePhrase(State state) {
+        assert (flags & PHRASE_OPERATOR) != 0;
+        int start = ++state.index;
+        int copied = 0;
+        boolean escaped = false;
+
+        while (state.index < state.length) {
+            if (!escaped) {
+                if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
+                    // an escape character has been found so
+                    // whatever character is next will become
+                    // part of the phrase unless the escape
+                    // character is the last one in the data
+                    escaped = true;
+                    ++state.index;
+
+                    continue;
+                } else if (state.data[state.index] == '"') {
+                    // this should be the end of the phrase
+                    // all characters found will used for
+                    // creating the phrase query
+                    break;
+                }
+            }
+
+            escaped = false;
+            state.buffer[copied++] = state.data[state.index++];
+        }
+
+        if (state.index == state.length) {
+            // a closing double quote was never found so the opening
+            // double quote is considered extraneous and will be ignored
+            state.index = start;
+        } else if (state.index == start) {
+            // a closing double quote was found immediately after the opening
+            // double quote so the current operation is reset since it would
+            // have been applied to this phrase
+            state.currentOperation = null;
+
+            ++state.index;
+        } else {
+            // a complete phrase has been found and is parsed through
+            // through the analyzer from the given field
+            String phrase = new String(state.buffer, 0, copied);
+            Query branch = newPhraseQuery(phrase);
+            buildQueryTree(state, branch);
+
+            ++state.index;
+        }
+    }
+
+    private void consumeToken(State state) {
+        int copied = 0;
+        boolean escaped = false;
+        boolean prefix = false;
+
+        while (state.index < state.length) {
+            if (!escaped) {
+                if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
+                    // an escape character has been found so
+                    // whatever character is next will become
+                    // part of the term unless the escape
+                    // character is the last one in the data
+                    escaped = true;
+                    prefix = false;
+                    ++state.index;
+
+                    continue;
+                } else if ((state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0)
+                        || (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0)
+                        || (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0)
+                        || (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
+                        || (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
+                        || ((state.data[state.index] == ' '
+                        || state.data[state.index] == '\t'
+                        || state.data[state.index] == '\n'
+                        || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)) {
+                    // this should be the end of the term
+                    // all characters found will used for
+                    // creating the term query
+                    break;
+                }
+
+                // wildcard tracks whether or not the last character
+                // was a '*' operator that hasn't been escaped
+                // there must be at least one valid character before
+                // searching for a prefixed set of terms
+                prefix = copied > 0 && state.data[state.index] == '*' && (flags & PREFIX_OPERATOR) != 0;
+            }
+
+            escaped = false;
+            state.buffer[copied++] = state.data[state.index++];
+        }
+
+        if (copied > 0) {
+            final Query branch;
+
+            if (prefix) {
+                // if a term is found with a closing '*' it is considered to be a prefix query
+                // and will have prefix added as an option
+                String token = new String(state.buffer, 0, copied - 1);
+                branch = newPrefixQuery(token);
+            } else {
+                // a standard term has been found so it will be run through
+                // the entire analysis chain from the specified schema field
+                String token = new String(state.buffer, 0, copied);
+                branch = newDefaultQuery(token);
+            }
+
+            buildQueryTree(state, branch);
+        }
+    }
+
+    // buildQueryTree should be called after a term, phrase, or subquery
+    // is consumed to be added to our existing query tree
+    // this method will only add to the existing tree if the branch contained in state is not null
+    private void buildQueryTree(State state, Query branch) {
+        if (branch != null) {
+            // modify our branch to a BooleanQuery wrapper for not
+            // this is necessary any time a term, phrase, or subquery is negated
+            if (state.not % 2 == 1) {
+                BooleanQuery nq = new BooleanQuery();
+                nq.add(branch, BooleanClause.Occur.MUST_NOT);
+                nq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+                branch = nq;
+            }
+
+            // first term (or phrase or subquery) found and will begin our query tree
+            if (state.top == null) {
+                state.top = branch;
+            } else {
+                // more than one term (or phrase or subquery) found
+                // set currentOperation to the default if no other operation is explicitly set
+                if (state.currentOperation == null) {
+                    state.currentOperation = defaultOperator;
+                }
+
+                // operational change requiring a new parent node
+                // this occurs if the previous operation is not the same as current operation
+                // because the previous operation must be evaluated separately to preserve
+                // the proper precedence and the current operation will take over as the top of the tree
+                if (state.previousOperation != state.currentOperation) {
+                    BooleanQuery bq = new BooleanQuery();
+                    bq.add(state.top, state.currentOperation);
+                    state.top = bq;
+                }
+
+                // reset all of the state for reuse
+                ((BooleanQuery)state.top).add(branch, state.currentOperation);
+                state.previousOperation = state.currentOperation;
+            }
+
+            // reset the current operation as it was intended to be applied to
+            // the incoming term (or phrase or subquery) even if branch was null
+            // due to other possible errors
+            state.currentOperation = null;
+        }
+    }
+
+    /**
+     * Factory method to generate a standard query (no phrase or prefix operators).
+     */
+    protected Query newDefaultQuery(String text) {
+        BooleanQuery bq = new BooleanQuery(true);
+        for (Map.Entry<String,Float> entry : weights.entrySet()) {
+            Query q = createBooleanQuery(entry.getKey(), text, defaultOperator);
+            if (q != null) {
+                q.setBoost(entry.getValue());
+                bq.add(q, BooleanClause.Occur.SHOULD);
+            }
+        }
+        return simplify(bq);
+    }
+
+    /**
+     * Factory method to generate a phrase query.
+     */
+    protected Query newPhraseQuery(String text) {
+        BooleanQuery bq = new BooleanQuery(true);
+        for (Map.Entry<String,Float> entry : weights.entrySet()) {
+            Query q = createPhraseQuery(entry.getKey(), text);
+            if (q != null) {
+                q.setBoost(entry.getValue());
+                bq.add(q, BooleanClause.Occur.SHOULD);
+            }
+        }
+        return simplify(bq);
+    }
+
+    /**
+     * Factory method to generate a prefix query.
+     */
+    protected Query newPrefixQuery(String text) {
+        BooleanQuery bq = new BooleanQuery(true);
+        for (Map.Entry<String,Float> entry : weights.entrySet()) {
+            PrefixQuery prefix = new PrefixQuery(new Term(entry.getKey(), text));
+            prefix.setBoost(entry.getValue());
+            bq.add(prefix, BooleanClause.Occur.SHOULD);
+        }
+        return simplify(bq);
+    }
+
+    /**
+     * Helper to simplify boolean queries with 0 or 1 clause
+     */
+    protected Query simplify(BooleanQuery bq) {
+        if (bq.clauses().isEmpty()) {
+            return null;
+        } else if (bq.clauses().size() == 1) {
+            return bq.clauses().get(0).getQuery();
+        } else {
+            return bq;
+        }
+    }
+
+    /**
+     * Returns the implicit operator setting, which will be
+     * either {@code SHOULD} or {@code MUST}.
+     */
+    public BooleanClause.Occur getDefaultOperator() {
+        return defaultOperator;
+    }
+
+    /**
+     * Sets the implicit operator setting, which must be
+     * either {@code SHOULD} or {@code MUST}.
+     */
+    public void setDefaultOperator(BooleanClause.Occur operator) {
+        if (operator != BooleanClause.Occur.SHOULD && operator != BooleanClause.Occur.MUST) {
+            throw new IllegalArgumentException("invalid operator: only SHOULD or MUST are allowed");
+        }
+        this.defaultOperator = operator;
+    }
+
+    static class State {
+        final char[] data;   // the characters in the query string
+        final char[] buffer; // a temporary buffer used to reduce necessary allocations
+        int index;
+        int length;
+
+        BooleanClause.Occur currentOperation;
+        BooleanClause.Occur previousOperation;
+        int not;
+
+        Query top;
+
+        State(char[] data, char[] buffer, int index, int length) {
+            this.data = data;
+            this.buffer = buffer;
+            this.index = index;
+            this.length = length;
+        }
+    }
+}
+
diff --git a/src/main/java/org/apache/lucene/queryparser/classic/ExistsFieldQueryExtension.java b/src/main/java/org/apache/lucene/queryparser/classic/ExistsFieldQueryExtension.java
new file mode 100644
index 0000000..470f7b8
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queryparser/classic/ExistsFieldQueryExtension.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.queryparser.classic;
+
+import org.apache.lucene.search.Query;
+import org.elasticsearch.common.lucene.search.XConstantScoreQuery;
+import org.elasticsearch.index.query.ExistsFilterParser;
+import org.elasticsearch.index.query.QueryParseContext;
+
+/**
+ *
+ */
+public class ExistsFieldQueryExtension implements FieldQueryExtension {
+
+    public static final String NAME = "_exists_";
+
+    @Override
+    public Query query(QueryParseContext parseContext, String queryText) {
+        return new XConstantScoreQuery(ExistsFilterParser.newFilter(parseContext, queryText, null));
+    }
+}
diff --git a/src/main/java/org/apache/lucene/queryparser/classic/FieldQueryExtension.java b/src/main/java/org/apache/lucene/queryparser/classic/FieldQueryExtension.java
new file mode 100644
index 0000000..003ff18
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queryparser/classic/FieldQueryExtension.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.queryparser.classic;
+
+import org.apache.lucene.search.Query;
+import org.elasticsearch.index.query.QueryParseContext;
+
+/**
+ *
+ */
+public interface FieldQueryExtension {
+
+    Query query(QueryParseContext parseContext, String queryText);
+}
diff --git a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
new file mode 100644
index 0000000..eceada9
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
@@ -0,0 +1,884 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.queryparser.classic;
+
+import com.google.common.base.Objects;
+import com.google.common.collect.ImmutableMap;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.automaton.RegExp;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
+import org.elasticsearch.common.lucene.search.Queries;
+import org.elasticsearch.common.lucene.search.XFilteredQuery;
+import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.query.support.QueryParsers;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded;
+import static org.elasticsearch.common.lucene.search.Queries.optimizeQuery;
+import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameQuery;
+
+/**
+ * A query parser that uses the {@link MapperService} in order to build smarter
+ * queries based on the mapping information.
+ * <p/>
+ * <p>Also breaks fields with [type].[name] into a boolean query that must include the type
+ * as well as the query on the name.
+ */
+public class MapperQueryParser extends QueryParser {
+
+    public static final ImmutableMap<String, FieldQueryExtension> fieldQueryExtensions;
+
+    static {
+        fieldQueryExtensions = ImmutableMap.<String, FieldQueryExtension>builder()
+                .put(ExistsFieldQueryExtension.NAME, new ExistsFieldQueryExtension())
+                .put(MissingFieldQueryExtension.NAME, new MissingFieldQueryExtension())
+                .build();
+    }
+
+    private final QueryParseContext parseContext;
+
+    private QueryParserSettings settings;
+
+    private Analyzer quoteAnalyzer;
+
+    private boolean forcedAnalyzer;
+    private boolean forcedQuoteAnalyzer;
+
+    private FieldMapper currentMapper;
+
+    private boolean analyzeWildcard;
+
+    private String quoteFieldSuffix;
+
+    public MapperQueryParser(QueryParseContext parseContext) {
+        super(Lucene.QUERYPARSER_VERSION, null, null);
+        this.parseContext = parseContext;
+    }
+
+    public MapperQueryParser(QueryParserSettings settings, QueryParseContext parseContext) {
+        super(Lucene.QUERYPARSER_VERSION, settings.defaultField(), settings.defaultAnalyzer());
+        this.parseContext = parseContext;
+        reset(settings);
+    }
+
+    public void reset(QueryParserSettings settings) {
+        this.settings = settings;
+        this.field = settings.defaultField();
+
+        if (settings.fields() != null) {
+            if (settings.fields.size() == 1) {
+                // just mark it as the default field
+                this.field = settings.fields().get(0);
+            } else {
+                // otherwise, we need to have the default field being null...
+                this.field = null;
+            }
+        }
+
+        this.forcedAnalyzer = settings.forcedAnalyzer() != null;
+        this.setAnalyzer(forcedAnalyzer ? settings.forcedAnalyzer() : settings.defaultAnalyzer());
+        if (settings.forcedQuoteAnalyzer() != null) {
+            this.forcedQuoteAnalyzer = true;
+            this.quoteAnalyzer = settings.forcedQuoteAnalyzer();
+        } else if (forcedAnalyzer) {
+            this.forcedQuoteAnalyzer = true;
+            this.quoteAnalyzer = settings.forcedAnalyzer();
+        } else {
+            this.forcedAnalyzer = false;
+            this.quoteAnalyzer = settings.defaultQuoteAnalyzer();
+        }
+        this.quoteFieldSuffix = settings.quoteFieldSuffix();
+        setMultiTermRewriteMethod(settings.rewriteMethod());
+        setEnablePositionIncrements(settings.enablePositionIncrements());
+        setAutoGeneratePhraseQueries(settings.autoGeneratePhraseQueries());
+        setAllowLeadingWildcard(settings.allowLeadingWildcard());
+        setLowercaseExpandedTerms(settings.lowercaseExpandedTerms());
+        setPhraseSlop(settings.phraseSlop());
+        setDefaultOperator(settings.defaultOperator());
+        setFuzzyMinSim(settings.fuzzyMinSim());
+        setFuzzyPrefixLength(settings.fuzzyPrefixLength());
+        this.analyzeWildcard = settings.analyzeWildcard();
+    }
+
+    /**
+     * We override this one so we can get the fuzzy part to be treated as string, so people can do: "age:10~5" or "timestamp:2012-10-10~5d"
+     */
+    @Override
+    Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException {
+        if (fuzzySlop.image.length() == 1) {
+            return getFuzzyQuery(qfield, termImage, Float.toString(fuzzyMinSim));
+        }
+        return getFuzzyQuery(qfield, termImage, fuzzySlop.image.substring(1));
+    }
+
+    @Override
+    protected Query newTermQuery(Term term) {
+        if (currentMapper != null) {
+            Query termQuery = currentMapper.queryStringTermQuery(term);
+            if (termQuery != null) {
+                return termQuery;
+            }
+        }
+        return super.newTermQuery(term);
+    }
+
+    @Override
+    protected Query newMatchAllDocsQuery() {
+        return Queries.newMatchAllQuery();
+    }
+
+    @Override
+    public Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
+        FieldQueryExtension fieldQueryExtension = fieldQueryExtensions.get(field);
+        if (fieldQueryExtension != null) {
+            return fieldQueryExtension.query(parseContext, queryText);
+        }
+        Collection<String> fields = extractMultiFields(field);
+        if (fields != null) {
+            if (fields.size() == 1) {
+                return getFieldQuerySingle(fields.iterator().next(), queryText, quoted);
+            }
+            if (settings.useDisMax()) {
+                DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
+                boolean added = false;
+                for (String mField : fields) {
+                    Query q = getFieldQuerySingle(mField, queryText, quoted);
+                    if (q != null) {
+                        added = true;
+                        applyBoost(mField, q);
+                        disMaxQuery.add(q);
+                    }
+                }
+                if (!added) {
+                    return null;
+                }
+                return disMaxQuery;
+            } else {
+                List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+                for (String mField : fields) {
+                    Query q = getFieldQuerySingle(mField, queryText, quoted);
+                    if (q != null) {
+                        applyBoost(mField, q);
+                        clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+                    }
+                }
+                if (clauses.size() == 0)  // happens for stopwords
+                    return null;
+                return getBooleanQuery(clauses, true);
+            }
+        } else {
+            return getFieldQuerySingle(field, queryText, quoted);
+        }
+    }
+
+    private Query getFieldQuerySingle(String field, String queryText, boolean quoted) throws ParseException {
+        if (!quoted && queryText.length() > 1) {
+            if (queryText.charAt(0) == '>') {
+                if (queryText.length() > 2) {
+                    if (queryText.charAt(1) == '=') {
+                        return getRangeQuerySingle(field, queryText.substring(2), null, true, true);
+                    }
+                }
+                return getRangeQuerySingle(field, queryText.substring(1), null, false, true);
+            } else if (queryText.charAt(0) == '<') {
+                if (queryText.length() > 2) {
+                    if (queryText.charAt(1) == '=') {
+                        return getRangeQuerySingle(field, null, queryText.substring(2), true, true);
+                    }
+                }
+                return getRangeQuerySingle(field, null, queryText.substring(1), true, false);
+            }
+        }
+        currentMapper = null;
+        Analyzer oldAnalyzer = getAnalyzer();
+        try {
+            MapperService.SmartNameFieldMappers fieldMappers = null;
+            if (quoted) {
+                setAnalyzer(quoteAnalyzer);
+                if (quoteFieldSuffix != null) {
+                    fieldMappers = parseContext.smartFieldMappers(field + quoteFieldSuffix);
+                }
+            }
+            if (fieldMappers == null) {
+                fieldMappers = parseContext.smartFieldMappers(field);
+            }
+            if (fieldMappers != null) {
+                if (quoted) {
+                    if (!forcedQuoteAnalyzer) {
+                        setAnalyzer(fieldMappers.searchQuoteAnalyzer());
+                    }
+                } else {
+                    if (!forcedAnalyzer) {
+                        setAnalyzer(fieldMappers.searchAnalyzer());
+                    }
+                }
+                currentMapper = fieldMappers.fieldMappers().mapper();
+                if (currentMapper != null) {
+                    Query query = null;
+                    if (currentMapper.useTermQueryWithQueryString()) {
+                        try {
+                            if (fieldMappers.explicitTypeInNameWithDocMapper()) {
+                                String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{fieldMappers.docMapper().type()});
+                                try {
+                                    query = currentMapper.termQuery(queryText, parseContext);
+                                } finally {
+                                    QueryParseContext.setTypes(previousTypes);
+                                }
+                            } else {
+                                query = currentMapper.termQuery(queryText, parseContext);
+                            }
+                        } catch (RuntimeException e) {
+                            if (settings.lenient()) {
+                                return null;
+                            } else {
+                                throw e;
+                            }
+                        }
+                    }
+                    if (query == null) {
+                        query = super.getFieldQuery(currentMapper.names().indexName(), queryText, quoted);
+                    }
+                    return wrapSmartNameQuery(query, fieldMappers, parseContext);
+                }
+            }
+            return super.getFieldQuery(field, queryText, quoted);
+        } finally {
+            setAnalyzer(oldAnalyzer);
+        }
+    }
+
+    @Override
+    protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException {
+        Collection<String> fields = extractMultiFields(field);
+        if (fields != null) {
+            if (settings.useDisMax()) {
+                DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
+                boolean added = false;
+                for (String mField : fields) {
+                    Query q = super.getFieldQuery(mField, queryText, slop);
+                    if (q != null) {
+                        added = true;
+                        applyBoost(mField, q);
+                        applySlop(q, slop);
+                        disMaxQuery.add(q);
+                    }
+                }
+                if (!added) {
+                    return null;
+                }
+                return disMaxQuery;
+            } else {
+                List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+                for (String mField : fields) {
+                    Query q = super.getFieldQuery(mField, queryText, slop);
+                    if (q != null) {
+                        applyBoost(mField, q);
+                        applySlop(q, slop);
+                        clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+                    }
+                }
+                if (clauses.size() == 0)  // happens for stopwords
+                    return null;
+                return getBooleanQuery(clauses, true);
+            }
+        } else {
+            return super.getFieldQuery(field, queryText, slop);
+        }
+    }
+
+    @Override
+    protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
+        if ("*".equals(part1)) {
+            part1 = null;
+        }
+        if ("*".equals(part2)) {
+            part2 = null;
+        }
+
+        Collection<String> fields = extractMultiFields(field);
+
+        if (fields == null) {
+            return getRangeQuerySingle(field, part1, part2, startInclusive, endInclusive);
+        }
+
+
+        if (fields.size() == 1) {
+            return getRangeQuerySingle(fields.iterator().next(), part1, part2, startInclusive, endInclusive);
+        }
+
+        if (settings.useDisMax()) {
+            DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
+            boolean added = false;
+            for (String mField : fields) {
+                Query q = getRangeQuerySingle(mField, part1, part2, startInclusive, endInclusive);
+                if (q != null) {
+                    added = true;
+                    applyBoost(mField, q);
+                    disMaxQuery.add(q);
+                }
+            }
+            if (!added) {
+                return null;
+            }
+            return disMaxQuery;
+        } else {
+            List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+            for (String mField : fields) {
+                Query q = getRangeQuerySingle(mField, part1, part2, startInclusive, endInclusive);
+                if (q != null) {
+                    applyBoost(mField, q);
+                    clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+                }
+            }
+            if (clauses.size() == 0)  // happens for stopwords
+                return null;
+            return getBooleanQuery(clauses, true);
+        }
+    }
+
+    private Query getRangeQuerySingle(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) {
+        currentMapper = null;
+        MapperService.SmartNameFieldMappers fieldMappers = parseContext.smartFieldMappers(field);
+        if (fieldMappers != null) {
+            currentMapper = fieldMappers.fieldMappers().mapper();
+            if (currentMapper != null) {
+
+                if (lowercaseExpandedTerms && !currentMapper.isNumeric()) {
+                    part1 = part1 == null ? null : part1.toLowerCase(locale);
+                    part2 = part2 == null ? null : part2.toLowerCase(locale);
+                }
+
+                try {
+                    Query rangeQuery = currentMapper.rangeQuery(part1, part2, startInclusive, endInclusive, parseContext);
+                    return wrapSmartNameQuery(rangeQuery, fieldMappers, parseContext);
+                } catch (RuntimeException e) {
+                    if (settings.lenient()) {
+                        return null;
+                    }
+                    throw e;
+                }
+            }
+        }
+        return newRangeQuery(field, part1, part2, startInclusive, endInclusive);
+    }
+
+    protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException {
+        if (lowercaseExpandedTerms) {
+            termStr = termStr.toLowerCase(locale);
+        }
+        Collection<String> fields = extractMultiFields(field);
+        if (fields != null) {
+            if (fields.size() == 1) {
+                return getFuzzyQuerySingle(fields.iterator().next(), termStr, minSimilarity);
+            }
+            if (settings.useDisMax()) {
+                DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
+                boolean added = false;
+                for (String mField : fields) {
+                    Query q = getFuzzyQuerySingle(mField, termStr, minSimilarity);
+                    if (q != null) {
+                        added = true;
+                        applyBoost(mField, q);
+                        disMaxQuery.add(q);
+                    }
+                }
+                if (!added) {
+                    return null;
+                }
+                return disMaxQuery;
+            } else {
+                List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+                for (String mField : fields) {
+                    Query q = getFuzzyQuerySingle(mField, termStr, minSimilarity);
+                    applyBoost(mField, q);
+                    clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+                }
+                return getBooleanQuery(clauses, true);
+            }
+        } else {
+            return getFuzzyQuerySingle(field, termStr, minSimilarity);
+        }
+    }
+
+    private Query getFuzzyQuerySingle(String field, String termStr, String minSimilarity) throws ParseException {
+        currentMapper = null;
+        MapperService.SmartNameFieldMappers fieldMappers = parseContext.smartFieldMappers(field);
+        if (fieldMappers != null) {
+            currentMapper = fieldMappers.fieldMappers().mapper();
+            if (currentMapper != null) {
+                try {
+                    //LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed
+                    Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, Fuzziness.build(minSimilarity), fuzzyPrefixLength, settings.fuzzyMaxExpansions(), false);
+                    return wrapSmartNameQuery(fuzzyQuery, fieldMappers, parseContext);
+                } catch (RuntimeException e) {
+                    if (settings.lenient()) {
+                        return null;
+                    }
+                    throw e;
+                }
+            }
+        }
+        return super.getFuzzyQuery(field, termStr, Float.parseFloat(minSimilarity));
+    }
+
+    @Override
+    protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
+        String text = term.text();
+        int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, text.codePointCount(0, text.length()));
+        //LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed 
+        FuzzyQuery query = new FuzzyQuery(term, numEdits, prefixLength, settings.fuzzyMaxExpansions(), false);
+        QueryParsers.setRewriteMethod(query, settings.fuzzyRewriteMethod());
+        return query;
+    }
+
+    @Override
+    protected Query getPrefixQuery(String field, String termStr) throws ParseException {
+        if (lowercaseExpandedTerms) {
+            termStr = termStr.toLowerCase(locale);
+        }
+        Collection<String> fields = extractMultiFields(field);
+        if (fields != null) {
+            if (fields.size() == 1) {
+                return getPrefixQuerySingle(fields.iterator().next(), termStr);
+            }
+            if (settings.useDisMax()) {
+                DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
+                boolean added = false;
+                for (String mField : fields) {
+                    Query q = getPrefixQuerySingle(mField, termStr);
+                    if (q != null) {
+                        added = true;
+                        applyBoost(mField, q);
+                        disMaxQuery.add(q);
+                    }
+                }
+                if (!added) {
+                    return null;
+                }
+                return disMaxQuery;
+            } else {
+                List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+                for (String mField : fields) {
+                    Query q = getPrefixQuerySingle(mField, termStr);
+                    if (q != null) {
+                        applyBoost(mField, q);
+                        clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+                    }
+                }
+                if (clauses.size() == 0)  // happens for stopwords
+                    return null;
+                return getBooleanQuery(clauses, true);
+            }
+        } else {
+            return getPrefixQuerySingle(field, termStr);
+        }
+    }
+
+    private Query getPrefixQuerySingle(String field, String termStr) throws ParseException {
+        currentMapper = null;
+        Analyzer oldAnalyzer = getAnalyzer();
+        try {
+            MapperService.SmartNameFieldMappers fieldMappers = parseContext.smartFieldMappers(field);
+            if (fieldMappers != null) {
+                if (!forcedAnalyzer) {
+                    setAnalyzer(fieldMappers.searchAnalyzer());
+                }
+                currentMapper = fieldMappers.fieldMappers().mapper();
+                if (currentMapper != null) {
+                    Query query = null;
+                    if (currentMapper.useTermQueryWithQueryString()) {
+                        if (fieldMappers.explicitTypeInNameWithDocMapper()) {
+                            String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{fieldMappers.docMapper().type()});
+                            try {
+                                query = currentMapper.prefixQuery(termStr, multiTermRewriteMethod, parseContext);
+                            } finally {
+                                QueryParseContext.setTypes(previousTypes);
+                            }
+                        } else {
+                            query = currentMapper.prefixQuery(termStr, multiTermRewriteMethod, parseContext);
+                        }
+                    }
+                    if (query == null) {
+                        query = getPossiblyAnalyzedPrefixQuery(currentMapper.names().indexName(), termStr);
+                    }
+                    return wrapSmartNameQuery(query, fieldMappers, parseContext);
+                }
+            }
+            return getPossiblyAnalyzedPrefixQuery(field, termStr);
+        } catch (RuntimeException e) {
+            if (settings.lenient()) {
+                return null;
+            }
+            throw e;
+        } finally {
+            setAnalyzer(oldAnalyzer);
+        }
+    }
+
+    private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
+        if (!analyzeWildcard) {
+            return super.getPrefixQuery(field, termStr);
+        }
+        // get Analyzer from superclass and tokenize the term
+        TokenStream source;
+        try {
+            source = getAnalyzer().tokenStream(field, termStr);
+            source.reset();
+        } catch (IOException e) {
+            return super.getPrefixQuery(field, termStr);
+        }
+        List<String> tlist = new ArrayList<String>();
+        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
+
+        while (true) {
+            try {
+                if (!source.incrementToken()) break;
+            } catch (IOException e) {
+                break;
+            }
+            tlist.add(termAtt.toString());
+        }
+
+        try {
+            source.close();
+        } catch (IOException e) {
+            // ignore
+        }
+
+        if (tlist.size() == 1) {
+            return super.getPrefixQuery(field, tlist.get(0));
+        } else {
+            // build a boolean query with prefix on each one...
+            List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+            for (String token : tlist) {
+                clauses.add(new BooleanClause(super.getPrefixQuery(field, token), BooleanClause.Occur.SHOULD));
+            }
+            return getBooleanQuery(clauses, true);
+
+            //return super.getPrefixQuery(field, termStr);
+
+            /* this means that the analyzer used either added or consumed
+* (common for a stemmer) tokens, and we can't build a PrefixQuery */
+//            throw new ParseException("Cannot build PrefixQuery with analyzer "
+//                    + getAnalyzer().getClass()
+//                    + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
+        }
+
+    }
+
+    @Override
+    protected Query getWildcardQuery(String field, String termStr) throws ParseException {
+        if (termStr.equals("*")) {
+            // we want to optimize for match all query for the "*:*", and "*" cases
+            if ("*".equals(field) || Objects.equal(field, this.field)) {
+                String actualField = field;
+                if (actualField == null) {
+                    actualField = this.field;
+                }
+                if (actualField == null) {
+                    return newMatchAllDocsQuery();
+                }
+                if ("*".equals(actualField) || "_all".equals(actualField)) {
+                    return newMatchAllDocsQuery();
+                }
+                // effectively, we check if a field exists or not
+                return fieldQueryExtensions.get(ExistsFieldQueryExtension.NAME).query(parseContext, actualField);
+            }
+        }
+        if (lowercaseExpandedTerms) {
+            termStr = termStr.toLowerCase(locale);
+        }
+        Collection<String> fields = extractMultiFields(field);
+        if (fields != null) {
+            if (fields.size() == 1) {
+                return getWildcardQuerySingle(fields.iterator().next(), termStr);
+            }
+            if (settings.useDisMax()) {
+                DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
+                boolean added = false;
+                for (String mField : fields) {
+                    Query q = getWildcardQuerySingle(mField, termStr);
+                    if (q != null) {
+                        added = true;
+                        applyBoost(mField, q);
+                        disMaxQuery.add(q);
+                    }
+                }
+                if (!added) {
+                    return null;
+                }
+                return disMaxQuery;
+            } else {
+                List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+                for (String mField : fields) {
+                    Query q = getWildcardQuerySingle(mField, termStr);
+                    if (q != null) {
+                        applyBoost(mField, q);
+                        clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+                    }
+                }
+                if (clauses.size() == 0)  // happens for stopwords
+                    return null;
+                return getBooleanQuery(clauses, true);
+            }
+        } else {
+            return getWildcardQuerySingle(field, termStr);
+        }
+    }
+
+    private Query getWildcardQuerySingle(String field, String termStr) throws ParseException {
+        String indexedNameField = field;
+        currentMapper = null;
+        Analyzer oldAnalyzer = getAnalyzer();
+        try {
+            MapperService.SmartNameFieldMappers fieldMappers = parseContext.smartFieldMappers(field);
+            if (fieldMappers != null) {
+                if (!forcedAnalyzer) {
+                    setAnalyzer(fieldMappers.searchAnalyzer());
+                }
+                currentMapper = fieldMappers.fieldMappers().mapper();
+                if (currentMapper != null) {
+                    indexedNameField = currentMapper.names().indexName();
+                }
+                return wrapSmartNameQuery(getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr), fieldMappers, parseContext);
+            }
+            return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
+        } catch (RuntimeException e) {
+            if (settings.lenient()) {
+                return null;
+            }
+            throw e;
+        } finally {
+            setAnalyzer(oldAnalyzer);
+        }
+    }
+
+    private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
+        if (!analyzeWildcard) {
+            return super.getWildcardQuery(field, termStr);
+        }
+        boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
+        StringBuilder aggStr = new StringBuilder();
+        StringBuilder tmp = new StringBuilder();
+        for (int i = 0; i < termStr.length(); i++) {
+            char c = termStr.charAt(i);
+            if (c == '?' || c == '*') {
+                if (isWithinToken) {
+                    try {
+                        TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
+                        source.reset();
+                        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
+                        if (source.incrementToken()) {
+                            String term = termAtt.toString();
+                            if (term.length() == 0) {
+                                // no tokens, just use what we have now
+                                aggStr.append(tmp);
+                            } else {
+                                aggStr.append(term);
+                            }
+                        } else {
+                            // no tokens, just use what we have now
+                            aggStr.append(tmp);
+                        }
+                        source.close();
+                    } catch (IOException e) {
+                        aggStr.append(tmp);
+                    }
+                    tmp.setLength(0);
+                }
+                isWithinToken = false;
+                aggStr.append(c);
+            } else {
+                tmp.append(c);
+                isWithinToken = true;
+            }
+        }
+        if (isWithinToken) {
+            try {
+                TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
+                source.reset();
+                CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
+                if (source.incrementToken()) {
+                    String term = termAtt.toString();
+                    if (term.length() == 0) {
+                        // no tokens, just use what we have now
+                        aggStr.append(tmp);
+                    } else {
+                        aggStr.append(term);
+                    }
+                } else {
+                    // no tokens, just use what we have now
+                    aggStr.append(tmp);
+                }
+                source.close();
+            } catch (IOException e) {
+                aggStr.append(tmp);
+            }
+        }
+
+        return super.getWildcardQuery(field, aggStr.toString());
+    }
+
+    @Override
+    protected Query getRegexpQuery(String field, String termStr) throws ParseException {
+        if (lowercaseExpandedTerms) {
+            termStr = termStr.toLowerCase(locale);
+        }
+        Collection<String> fields = extractMultiFields(field);
+        if (fields != null) {
+            if (fields.size() == 1) {
+                return getRegexpQuerySingle(fields.iterator().next(), termStr);
+            }
+            if (settings.useDisMax()) {
+                DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
+                boolean added = false;
+                for (String mField : fields) {
+                    Query q = getRegexpQuerySingle(mField, termStr);
+                    if (q != null) {
+                        added = true;
+                        applyBoost(mField, q);
+                        disMaxQuery.add(q);
+                    }
+                }
+                if (!added) {
+                    return null;
+                }
+                return disMaxQuery;
+            } else {
+                List<BooleanClause> clauses = new ArrayList<BooleanClause>();
+                for (String mField : fields) {
+                    Query q = getRegexpQuerySingle(mField, termStr);
+                    if (q != null) {
+                        applyBoost(mField, q);
+                        clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+                    }
+                }
+                if (clauses.size() == 0)  // happens for stopwords
+                    return null;
+                return getBooleanQuery(clauses, true);
+            }
+        } else {
+            return getRegexpQuerySingle(field, termStr);
+        }
+    }
+
+    private Query getRegexpQuerySingle(String field, String termStr) throws ParseException {
+        currentMapper = null;
+        Analyzer oldAnalyzer = getAnalyzer();
+        try {
+            MapperService.SmartNameFieldMappers fieldMappers = parseContext.smartFieldMappers(field);
+            if (fieldMappers != null) {
+                if (!forcedAnalyzer) {
+                    setAnalyzer(fieldMappers.searchAnalyzer());
+                }
+                currentMapper = fieldMappers.fieldMappers().mapper();
+                if (currentMapper != null) {
+                    Query query = null;
+                    if (currentMapper.useTermQueryWithQueryString()) {
+                        if (fieldMappers.explicitTypeInNameWithDocMapper()) {
+                            String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{fieldMappers.docMapper().type()});
+                            try {
+                                query = currentMapper.regexpQuery(termStr, RegExp.ALL, multiTermRewriteMethod, parseContext);
+                            } finally {
+                                QueryParseContext.setTypes(previousTypes);
+                            }
+                        } else {
+                            query = currentMapper.regexpQuery(termStr, RegExp.ALL, multiTermRewriteMethod, parseContext);
+                        }
+                    }
+                    if (query == null) {
+                        query = super.getRegexpQuery(field, termStr);
+                    }
+                    return wrapSmartNameQuery(query, fieldMappers, parseContext);
+                }
+            }
+            return super.getRegexpQuery(field, termStr);
+        } catch (RuntimeException e) {
+            if (settings.lenient()) {
+                return null;
+            }
+            throw e;
+        } finally {
+            setAnalyzer(oldAnalyzer);
+        }
+    }
+
+    @Override
+    protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord) throws ParseException {
+        Query q = super.getBooleanQuery(clauses, disableCoord);
+        if (q == null) {
+            return null;
+        }
+        return optimizeQuery(fixNegativeQueryIfNeeded(q));
+    }
+
+    private void applyBoost(String field, Query q) {
+        if (settings.boosts() != null) {
+            float boost = 1f;
+            if (settings.boosts().containsKey(field)) {
+                boost = settings.boosts().lget();
+            }
+            q.setBoost(boost);
+        }
+    }
+
+    private void applySlop(Query q, int slop) {
+        if (q instanceof XFilteredQuery) {
+            applySlop(((XFilteredQuery)q).getQuery(), slop);
+        }
+        if (q instanceof PhraseQuery) {
+            ((PhraseQuery) q).setSlop(slop);
+        } else if (q instanceof MultiPhraseQuery) {
+            ((MultiPhraseQuery) q).setSlop(slop);
+        }
+    }
+
+    private Collection<String> extractMultiFields(String field) {
+        Collection<String> fields = null;
+        if (field != null) {
+            fields = parseContext.simpleMatchToIndexNames(field);
+        } else {
+            fields = settings.fields();
+        }
+        return fields;
+    }
+
+    public Query parse(String query) throws ParseException {
+        if (query.trim().isEmpty()) {
+            // if the query string is empty we return no docs / empty result
+            // the behavior is simple to change in the client if all docs is required
+            // or a default query
+            return new MatchNoDocsQuery();
+        }
+        return super.parse(query);
+    }
+}
diff --git a/src/main/java/org/apache/lucene/queryparser/classic/MissingFieldQueryExtension.java b/src/main/java/org/apache/lucene/queryparser/classic/MissingFieldQueryExtension.java
new file mode 100644
index 0000000..ad200d4
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queryparser/classic/MissingFieldQueryExtension.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.queryparser.classic;
+
+import org.apache.lucene.search.Query;
+import org.elasticsearch.common.lucene.search.XConstantScoreQuery;
+import org.elasticsearch.index.query.MissingFilterParser;
+import org.elasticsearch.index.query.QueryParseContext;
+
+/**
+ *
+ */
+public class MissingFieldQueryExtension implements FieldQueryExtension {
+
+    public static final String NAME = "_missing_";
+
+    @Override
+    public Query query(QueryParseContext parseContext, String queryText) {
+        return new XConstantScoreQuery(MissingFilterParser.newFilter(parseContext, queryText,
+                MissingFilterParser.DEFAULT_EXISTENCE_VALUE, MissingFilterParser.DEFAULT_NULL_VALUE, null));
+    }
+}
diff --git a/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java b/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java
new file mode 100644
index 0000000..9b8f9aa
--- /dev/null
+++ b/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java
@@ -0,0 +1,376 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.queryparser.classic;
+
+import com.carrotsearch.hppc.ObjectFloatOpenHashMap;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
+
+import java.util.Collection;
+import java.util.List;
+
+/**
+ *
+ */
+public class QueryParserSettings {
+
+    public static final boolean DEFAULT_ALLOW_LEADING_WILDCARD = true;
+    public static final boolean DEFAULT_ANALYZE_WILDCARD = false;
+    public static final float DEFAULT_BOOST = 1.f;
+
+    private String queryString;
+    private String defaultField;
+    private float boost = DEFAULT_BOOST;
+    private MapperQueryParser.Operator defaultOperator = QueryParser.Operator.OR;
+    private boolean autoGeneratePhraseQueries = false;
+    private boolean allowLeadingWildcard = DEFAULT_ALLOW_LEADING_WILDCARD;
+    private boolean lowercaseExpandedTerms = true;
+    private boolean enablePositionIncrements = true;
+    private int phraseSlop = 0;
+    private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
+    private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
+    private int fuzzyMaxExpansions = FuzzyQuery.defaultMaxExpansions;
+    private MultiTermQuery.RewriteMethod fuzzyRewriteMethod = null;
+    private boolean analyzeWildcard = DEFAULT_ANALYZE_WILDCARD;
+    private boolean escape = false;
+    private Analyzer defaultAnalyzer = null;
+    private Analyzer defaultQuoteAnalyzer = null;
+    private Analyzer forcedAnalyzer = null;
+    private Analyzer forcedQuoteAnalyzer = null;
+    private String quoteFieldSuffix = null;
+    private MultiTermQuery.RewriteMethod rewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
+    private String minimumShouldMatch;
+    private boolean lenient;
+
+
+    List<String> fields = null;
+    Collection<String> queryTypes = null;
+    ObjectFloatOpenHashMap<String> boosts = null;
+    float tieBreaker = 0.0f;
+    boolean useDisMax = true;
+
+    public boolean isCacheable() {
+        // a hack for now :) to determine if a query string is cacheable
+        return !queryString.contains("now");
+    }
+
+    public String queryString() {
+        return queryString;
+    }
+
+    public void queryString(String queryString) {
+        this.queryString = queryString;
+    }
+
+    public String defaultField() {
+        return defaultField;
+    }
+
+    public void defaultField(String defaultField) {
+        this.defaultField = defaultField;
+    }
+
+    public float boost() {
+        return boost;
+    }
+
+    public void boost(float boost) {
+        this.boost = boost;
+    }
+
+    public QueryParser.Operator defaultOperator() {
+        return defaultOperator;
+    }
+
+    public void defaultOperator(QueryParser.Operator defaultOperator) {
+        this.defaultOperator = defaultOperator;
+    }
+
+    public boolean autoGeneratePhraseQueries() {
+        return autoGeneratePhraseQueries;
+    }
+
+    public void autoGeneratePhraseQueries(boolean autoGeneratePhraseQueries) {
+        this.autoGeneratePhraseQueries = autoGeneratePhraseQueries;
+    }
+
+    public boolean allowLeadingWildcard() {
+        return allowLeadingWildcard;
+    }
+
+    public void allowLeadingWildcard(boolean allowLeadingWildcard) {
+        this.allowLeadingWildcard = allowLeadingWildcard;
+    }
+
+    public boolean lowercaseExpandedTerms() {
+        return lowercaseExpandedTerms;
+    }
+
+    public void lowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
+        this.lowercaseExpandedTerms = lowercaseExpandedTerms;
+    }
+
+    public boolean enablePositionIncrements() {
+        return enablePositionIncrements;
+    }
+
+    public void enablePositionIncrements(boolean enablePositionIncrements) {
+        this.enablePositionIncrements = enablePositionIncrements;
+    }
+
+    public int phraseSlop() {
+        return phraseSlop;
+    }
+
+    public void phraseSlop(int phraseSlop) {
+        this.phraseSlop = phraseSlop;
+    }
+
+    public float fuzzyMinSim() {
+        return fuzzyMinSim;
+    }
+
+    public void fuzzyMinSim(float fuzzyMinSim) {
+        this.fuzzyMinSim = fuzzyMinSim;
+    }
+
+    public int fuzzyPrefixLength() {
+        return fuzzyPrefixLength;
+    }
+
+    public void fuzzyPrefixLength(int fuzzyPrefixLength) {
+        this.fuzzyPrefixLength = fuzzyPrefixLength;
+    }
+
+    public int fuzzyMaxExpansions() {
+        return fuzzyMaxExpansions;
+    }
+
+    public void fuzzyMaxExpansions(int fuzzyMaxExpansions) {
+        this.fuzzyMaxExpansions = fuzzyMaxExpansions;
+    }
+
+    public MultiTermQuery.RewriteMethod fuzzyRewriteMethod() {
+        return fuzzyRewriteMethod;
+    }
+
+    public void fuzzyRewriteMethod(MultiTermQuery.RewriteMethod fuzzyRewriteMethod) {
+        this.fuzzyRewriteMethod = fuzzyRewriteMethod;
+    }
+
+    public boolean escape() {
+        return escape;
+    }
+
+    public void escape(boolean escape) {
+        this.escape = escape;
+    }
+
+    public Analyzer defaultAnalyzer() {
+        return defaultAnalyzer;
+    }
+
+    public void defaultAnalyzer(Analyzer defaultAnalyzer) {
+        this.defaultAnalyzer = defaultAnalyzer;
+    }
+
+    public Analyzer defaultQuoteAnalyzer() {
+        return defaultQuoteAnalyzer;
+    }
+
+    public void defaultQuoteAnalyzer(Analyzer defaultAnalyzer) {
+        this.defaultQuoteAnalyzer = defaultAnalyzer;
+    }
+
+    public Analyzer forcedAnalyzer() {
+        return forcedAnalyzer;
+    }
+
+    public void forcedAnalyzer(Analyzer forcedAnalyzer) {
+        this.forcedAnalyzer = forcedAnalyzer;
+    }
+
+    public Analyzer forcedQuoteAnalyzer() {
+        return forcedQuoteAnalyzer;
+    }
+
+    public void forcedQuoteAnalyzer(Analyzer forcedAnalyzer) {
+        this.forcedQuoteAnalyzer = forcedAnalyzer;
+    }
+
+    public boolean analyzeWildcard() {
+        return this.analyzeWildcard;
+    }
+
+    public void analyzeWildcard(boolean analyzeWildcard) {
+        this.analyzeWildcard = analyzeWildcard;
+    }
+
+    public MultiTermQuery.RewriteMethod rewriteMethod() {
+        return this.rewriteMethod;
+    }
+
+    public void rewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) {
+        this.rewriteMethod = rewriteMethod;
+    }
+
+    public String minimumShouldMatch() {
+        return this.minimumShouldMatch;
+    }
+
+    public void minimumShouldMatch(String minimumShouldMatch) {
+        this.minimumShouldMatch = minimumShouldMatch;
+    }
+
+    public void quoteFieldSuffix(String quoteFieldSuffix) {
+        this.quoteFieldSuffix = quoteFieldSuffix;
+    }
+
+    public String quoteFieldSuffix() {
+        return this.quoteFieldSuffix;
+    }
+
+    public void lenient(boolean lenient) {
+        this.lenient = lenient;
+    }
+
+    public boolean lenient() {
+        return this.lenient;
+    }
+
+    public List<String> fields() {
+        return fields;
+    }
+
+    public void fields(List<String> fields) {
+        this.fields = fields;
+    }
+
+    public Collection<String> queryTypes() {
+        return queryTypes;
+    }
+
+    public void queryTypes(Collection<String> queryTypes) {
+        this.queryTypes = queryTypes;
+    }
+
+    public ObjectFloatOpenHashMap<String> boosts() {
+        return boosts;
+    }
+
+    public void boosts(ObjectFloatOpenHashMap<String> boosts) {
+        this.boosts = boosts;
+    }
+
+    public float tieBreaker() {
+        return tieBreaker;
+    }
+
+    public void tieBreaker(float tieBreaker) {
+        this.tieBreaker = tieBreaker;
+    }
+
+    public boolean useDisMax() {
+        return useDisMax;
+    }
+
+    public void useDisMax(boolean useDisMax) {
+        this.useDisMax = useDisMax;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        QueryParserSettings that = (QueryParserSettings) o;
+
+        if (autoGeneratePhraseQueries != that.autoGeneratePhraseQueries()) return false;
+        if (allowLeadingWildcard != that.allowLeadingWildcard) return false;
+        if (Float.compare(that.boost, boost) != 0) return false;
+        if (enablePositionIncrements != that.enablePositionIncrements) return false;
+        if (escape != that.escape) return false;
+        if (analyzeWildcard != that.analyzeWildcard) return false;
+        if (Float.compare(that.fuzzyMinSim, fuzzyMinSim) != 0) return false;
+        if (fuzzyPrefixLength != that.fuzzyPrefixLength) return false;
+        if (fuzzyMaxExpansions != that.fuzzyMaxExpansions) return false;
+        if (fuzzyRewriteMethod != null ? !fuzzyRewriteMethod.equals(that.fuzzyRewriteMethod) : that.fuzzyRewriteMethod != null)
+            return false;
+        if (lowercaseExpandedTerms != that.lowercaseExpandedTerms) return false;
+        if (phraseSlop != that.phraseSlop) return false;
+        if (defaultAnalyzer != null ? !defaultAnalyzer.equals(that.defaultAnalyzer) : that.defaultAnalyzer != null)
+            return false;
+        if (defaultQuoteAnalyzer != null ? !defaultQuoteAnalyzer.equals(that.defaultQuoteAnalyzer) : that.defaultQuoteAnalyzer != null)
+            return false;
+        if (forcedAnalyzer != null ? !forcedAnalyzer.equals(that.forcedAnalyzer) : that.forcedAnalyzer != null)
+            return false;
+        if (forcedQuoteAnalyzer != null ? !forcedQuoteAnalyzer.equals(that.forcedQuoteAnalyzer) : that.forcedQuoteAnalyzer != null)
+            return false;
+        if (defaultField != null ? !defaultField.equals(that.defaultField) : that.defaultField != null) return false;
+        if (defaultOperator != that.defaultOperator) return false;
+        if (queryString != null ? !queryString.equals(that.queryString) : that.queryString != null) return false;
+        if (rewriteMethod != null ? !rewriteMethod.equals(that.rewriteMethod) : that.rewriteMethod != null)
+            return false;
+        if (minimumShouldMatch != null ? !minimumShouldMatch.equals(that.minimumShouldMatch) : that.minimumShouldMatch != null)
+            return false;
+        if (quoteFieldSuffix != null ? !quoteFieldSuffix.equals(that.quoteFieldSuffix) : that.quoteFieldSuffix != null)
+            return false;
+        if (lenient != that.lenient) {
+            return false;
+        }
+
+        if (Float.compare(that.tieBreaker, tieBreaker) != 0) return false;
+        if (useDisMax != that.useDisMax) return false;
+        if (boosts != null ? !boosts.equals(that.boosts) : that.boosts != null) return false;
+        if (fields != null ? !fields.equals(that.fields) : that.fields != null) return false;
+        if (queryTypes != null ? !queryTypes.equals(that.queryTypes) : that.queryTypes != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = queryString != null ? queryString.hashCode() : 0;
+        result = 31 * result + (defaultField != null ? defaultField.hashCode() : 0);
+        result = 31 * result + (boost != +0.0f ? Float.floatToIntBits(boost) : 0);
+        result = 31 * result + (defaultOperator != null ? defaultOperator.hashCode() : 0);
+        result = 31 * result + (autoGeneratePhraseQueries ? 1 : 0);
+        result = 31 * result + (allowLeadingWildcard ? 1 : 0);
+        result = 31 * result + (lowercaseExpandedTerms ? 1 : 0);
+        result = 31 * result + (enablePositionIncrements ? 1 : 0);
+        result = 31 * result + phraseSlop;
+        result = 31 * result + (fuzzyMinSim != +0.0f ? Float.floatToIntBits(fuzzyMinSim) : 0);
+        result = 31 * result + fuzzyPrefixLength;
+        result = 31 * result + (escape ? 1 : 0);
+        result = 31 * result + (defaultAnalyzer != null ? defaultAnalyzer.hashCode() : 0);
+        result = 31 * result + (defaultQuoteAnalyzer != null ? defaultQuoteAnalyzer.hashCode() : 0);
+        result = 31 * result + (forcedAnalyzer != null ? forcedAnalyzer.hashCode() : 0);
+        result = 31 * result + (forcedQuoteAnalyzer != null ? forcedQuoteAnalyzer.hashCode() : 0);
+        result = 31 * result + (analyzeWildcard ? 1 : 0);
+
+        result = 31 * result + (fields != null ? fields.hashCode() : 0);
+        result = 31 * result + (queryTypes != null ? queryTypes.hashCode() : 0);
+        result = 31 * result + (boosts != null ? boosts.hashCode() : 0);
+        result = 31 * result + (tieBreaker != +0.0f ? Float.floatToIntBits(tieBreaker) : 0);
+        result = 31 * result + (useDisMax ? 1 : 0);
+        return result;
+    }
+}
diff --git a/src/main/java/org/apache/lucene/search/XReferenceManager.java b/src/main/java/org/apache/lucene/search/XReferenceManager.java
new file mode 100644
index 0000000..07fb066
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/XReferenceManager.java
@@ -0,0 +1,326 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.Version;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * Utility class to safely share instances of a certain type across multiple
+ * threads, while periodically refreshing them. This class ensures each
+ * reference is closed only once all threads have finished using it. It is
+ * recommended to consult the documentation of {@link org.apache.lucene.search.XReferenceManager}
+ * implementations for their {@link #maybeRefresh()} semantics.
+ *
+ * @param <G>
+ *          the concrete type that will be {@link #acquire() acquired} and
+ *          {@link #release(Object) released}.
+ *
+ * @lucene.experimental
+ */
+public abstract class XReferenceManager<G> implements Closeable {
+    static {
+        assert Version.LUCENE_46 == org.elasticsearch.Version.CURRENT.luceneVersion : "Remove this once we are on LUCENE_47 - see LUCENE-5436";
+    }
+
+  private static final String REFERENCE_MANAGER_IS_CLOSED_MSG = "this ReferenceManager is closed";
+
+  protected volatile G current;
+
+  private final Lock refreshLock = new ReentrantLock();
+
+  private final List<RefreshListener> refreshListeners = new CopyOnWriteArrayList<RefreshListener>();
+
+  private void ensureOpen() {
+    if (current == null) {
+      throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
+    }
+  }
+
+  private synchronized void swapReference(G newReference) throws IOException {
+    ensureOpen();
+    final G oldReference = current;
+    current = newReference;
+    release(oldReference);
+  }
+
+  /**
+   * Decrement reference counting on the given reference.
+   * @throws java.io.IOException if reference decrement on the given resource failed.
+   * */
+  protected abstract void decRef(G reference) throws IOException;
+
+  /**
+   * Refresh the given reference if needed. Returns {@code null} if no refresh
+   * was needed, otherwise a new refreshed reference.
+   * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
+   * @throws java.io.IOException if the refresh operation failed
+   */
+  protected abstract G refreshIfNeeded(G referenceToRefresh) throws IOException;
+
+  /**
+   * Try to increment reference counting on the given reference. Return true if
+   * the operation was successful.
+   * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
+   */
+  protected abstract boolean tryIncRef(G reference) throws IOException;
+
+  /**
+   * Obtain the current reference. You must match every call to acquire with one
+   * call to {@link #release}; it's best to do so in a finally clause, and set
+   * the reference to {@code null} to prevent accidental usage after it has been
+   * released.
+   * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
+   */
+  public final G acquire() throws IOException {
+    G ref;
+
+    do {
+      if ((ref = current) == null) {
+        throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
+      }
+      if (tryIncRef(ref)) {
+        return ref;
+      }
+      if (getRefCount(ref) == 0 && current == ref) {
+        assert ref != null;
+        /* if we can't increment the reader but we are
+           still the current reference the RM is in a
+           illegal states since we can't make any progress
+           anymore. The reference is closed but the RM still
+           holds on to it as the actual instance.
+           This can only happen if somebody outside of the RM
+           decrements the refcount without a corresponding increment
+           since the RM assigns the new reference before counting down
+           the reference. */
+        throw new IllegalStateException("The managed reference has already closed - this is likely a bug when the reference count is modified outside of the ReferenceManager");
+      }
+    } while (true);
+  }
+
+  /**
+    * <p>
+    * Closes this ReferenceManager to prevent future {@link #acquire() acquiring}. A
+    * reference manager should be closed if the reference to the managed resource
+    * should be disposed or the application using the {@link org.apache.lucene.search.XReferenceManager}
+    * is shutting down. The managed resource might not be released immediately,
+    * if the {@link org.apache.lucene.search.XReferenceManager} user is holding on to a previously
+    * {@link #acquire() acquired} reference. The resource will be released once
+    * when the last reference is {@link #release(Object) released}. Those
+    * references can still be used as if the manager was still active.
+    * </p>
+    * <p>
+    * Applications should not {@link #acquire() acquire} new references from this
+    * manager once this method has been called. {@link #acquire() Acquiring} a
+    * resource on a closed {@link org.apache.lucene.search.XReferenceManager} will throw an
+    * {@link org.apache.lucene.store.AlreadyClosedException}.
+    * </p>
+    *
+    * @throws java.io.IOException
+    *           if the underlying reader of the current reference could not be closed
+   */
+  @Override
+  public final synchronized void close() throws IOException {
+    if (current != null) {
+      // make sure we can call this more than once
+      // closeable javadoc says:
+      // if this is already closed then invoking this method has no effect.
+      swapReference(null);
+      afterClose();
+    }
+  }
+
+  /**
+   * Returns the current reference count of the given reference.
+   */
+  protected abstract int getRefCount(G reference);
+
+  /**
+   *  Called after close(), so subclass can free any resources.
+   *  @throws java.io.IOException if the after close operation in a sub-class throws an {@link java.io.IOException}
+   * */
+  protected void afterClose() throws IOException {
+  }
+
+  private void doMaybeRefresh() throws IOException {
+    // it's ok to call lock() here (blocking) because we're supposed to get here
+    // from either maybeRefreh() or maybeRefreshBlocking(), after the lock has
+    // already been obtained. Doing that protects us from an accidental bug
+    // where this method will be called outside the scope of refreshLock.
+    // Per ReentrantLock's javadoc, calling lock() by the same thread more than
+    // once is ok, as long as unlock() is called a matching number of times.
+    refreshLock.lock();
+    boolean refreshed = false;
+    try {
+      final G reference = acquire();
+      try {
+        notifyRefreshListenersBefore();
+        G newReference = refreshIfNeeded(reference);
+        if (newReference != null) {
+          assert newReference != reference : "refreshIfNeeded should return null if refresh wasn't needed";
+          try {
+            swapReference(newReference);
+            refreshed = true;
+          } finally {
+            if (!refreshed) {
+              release(newReference);
+            }
+          }
+        }
+      } finally {
+        release(reference);
+        notifyRefreshListenersRefreshed(refreshed);
+      }
+      afterMaybeRefresh();
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  /**
+   * You must call this (or {@link #maybeRefreshBlocking()}), periodically, if
+   * you want that {@link #acquire()} will return refreshed instances.
+   *
+   * <p>
+   * <b>Threads</b>: it's fine for more than one thread to call this at once.
+   * Only the first thread will attempt the refresh; subsequent threads will see
+   * that another thread is already handling refresh and will return
+   * immediately. Note that this means if another thread is already refreshing
+   * then subsequent threads will return right away without waiting for the
+   * refresh to complete.
+   *
+   * <p>
+   * If this method returns true it means the calling thread either refreshed or
+   * that there were no changes to refresh. If it returns false it means another
+   * thread is currently refreshing.
+   * </p>
+   * @throws java.io.IOException if refreshing the resource causes an {@link java.io.IOException}
+   * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
+   */
+  public final boolean maybeRefresh() throws IOException {
+    ensureOpen();
+
+    // Ensure only 1 thread does refresh at once; other threads just return immediately:
+    final boolean doTryRefresh = refreshLock.tryLock();
+    if (doTryRefresh) {
+      try {
+        doMaybeRefresh();
+      } finally {
+        refreshLock.unlock();
+      }
+    }
+
+    return doTryRefresh;
+  }
+
+  /**
+   * You must call this (or {@link #maybeRefresh()}), periodically, if you want
+   * that {@link #acquire()} will return refreshed instances.
+   *
+   * <p>
+   * <b>Threads</b>: unlike {@link #maybeRefresh()}, if another thread is
+   * currently refreshing, this method blocks until that thread completes. It is
+   * useful if you want to guarantee that the next call to {@link #acquire()}
+   * will return a refreshed instance. Otherwise, consider using the
+   * non-blocking {@link #maybeRefresh()}.
+   * @throws java.io.IOException if refreshing the resource causes an {@link java.io.IOException}
+   * @throws org.apache.lucene.store.AlreadyClosedException if the reference manager has been {@link #close() closed}.
+   */
+  public final void maybeRefreshBlocking() throws IOException {
+    ensureOpen();
+
+    // Ensure only 1 thread does refresh at once
+    refreshLock.lock();
+    try {
+      doMaybeRefresh();
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  /** Called after a refresh was attempted, regardless of
+   *  whether a new reference was in fact created.
+   *  @throws java.io.IOException if a low level I/O exception occurs
+   **/
+  protected void afterMaybeRefresh() throws IOException {
+  }
+
+  /**
+   * Release the reference previously obtained via {@link #acquire()}.
+   * <p>
+   * <b>NOTE:</b> it's safe to call this after {@link #close()}.
+   * @throws java.io.IOException if the release operation on the given resource throws an {@link java.io.IOException}
+   */
+  public final void release(G reference) throws IOException {
+    assert reference != null;
+    decRef(reference);
+  }
+
+  private void notifyRefreshListenersBefore() throws IOException {
+    for (RefreshListener refreshListener : refreshListeners) {
+      refreshListener.beforeRefresh();
+    }
+  }
+
+  private void notifyRefreshListenersRefreshed(boolean didRefresh) throws IOException {
+    for (RefreshListener refreshListener : refreshListeners) {
+      refreshListener.afterRefresh(didRefresh);
+    }
+  }
+
+  /**
+   * Adds a listener, to be notified when a reference is refreshed/swapped.
+   */
+  public void addListener(RefreshListener listener) {
+    if (listener == null) {
+      throw new NullPointerException("Listener cannot be null");
+    }
+    refreshListeners.add(listener);
+  }
+
+  /**
+   * Remove a listener added with {@link #addListener(RefreshListener)}.
+   */
+  public void removeListener(RefreshListener listener) {
+    if (listener == null) {
+      throw new NullPointerException("Listener cannot be null");
+    }
+    refreshListeners.remove(listener);
+  }
+
+  /** Use to receive notification when a refresh has
+   *  finished.  See {@link #addListener}. */
+  public interface RefreshListener {
+
+    /** Called right before a refresh attempt starts. */
+    void beforeRefresh() throws IOException;
+
+    /** Called after the attempted refresh; if the refresh
+     * did open a new reference then didRefresh will be true
+     * and {@link #acquire()} is guaranteed to return the new
+     * reference. */
+    void afterRefresh(boolean didRefresh) throws IOException;
+  }
+}
diff --git a/src/main/java/org/apache/lucene/search/XSearcherManager.java b/src/main/java/org/apache/lucene/search/XSearcherManager.java
new file mode 100644
index 0000000..36c74a0
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/XSearcherManager.java
@@ -0,0 +1,177 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
+
+/**
+ * Utility class to safely share {@link IndexSearcher} instances across multiple
+ * threads, while periodically reopening. This class ensures each searcher is
+ * closed only once all threads have finished using it.
+ * 
+ * <p>
+ * Use {@link #acquire} to obtain the current searcher, and {@link #release} to
+ * release it, like this:
+ * 
+ * <pre class="prettyprint">
+ * IndexSearcher s = manager.acquire();
+ * try {
+ *   // Do searching, doc retrieval, etc. with s
+ * } finally {
+ *   manager.release(s);
+ * }
+ * // Do not use s after this!
+ * s = null;
+ * </pre>
+ * 
+ * <p>
+ * In addition you should periodically call {@link #maybeRefresh}. While it's
+ * possible to call this just before running each query, this is discouraged
+ * since it penalizes the unlucky queries that do the reopen. It's better to use
+ * a separate background thread, that periodically calls maybeReopen. Finally,
+ * be sure to call {@link #close} once you are done.
+ * 
+ * @see SearcherFactory
+ * 
+ * @lucene.experimental
+ */
+public final class XSearcherManager extends XReferenceManager<IndexSearcher> {
+
+  static {
+      assert Version.LUCENE_46 == org.elasticsearch.Version.CURRENT.luceneVersion : "Remove this once we are on LUCENE_47 - see LUCENE-5436";
+  }
+
+  private final SearcherFactory searcherFactory;
+
+  /**
+   * Creates and returns a new XSearcherManager from the given
+   * {@link IndexWriter}.
+   * 
+   * @param writer
+   *          the IndexWriter to open the IndexReader from.
+   * @param applyAllDeletes
+   *          If <code>true</code>, all buffered deletes will be applied (made
+   *          visible) in the {@link IndexSearcher} / {@link DirectoryReader}.
+   *          If <code>false</code>, the deletes may or may not be applied, but
+   *          remain buffered (in IndexWriter) so that they will be applied in
+   *          the future. Applying deletes can be costly, so if your app can
+   *          tolerate deleted documents being returned you might gain some
+   *          performance by passing <code>false</code>. See
+   *          {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter, boolean)}.
+   * @param searcherFactory
+   *          An optional {@link SearcherFactory}. Pass <code>null</code> if you
+   *          don't require the searcher to be warmed before going live or other
+   *          custom behavior.
+   * 
+   * @throws IOException if there is a low-level I/O error
+   */
+  public XSearcherManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory) throws IOException {
+    if (searcherFactory == null) {
+      searcherFactory = new SearcherFactory();
+    }
+    this.searcherFactory = searcherFactory;
+    current = getSearcher(searcherFactory, DirectoryReader.open(writer, applyAllDeletes));
+  }
+  
+  /**
+   * Creates and returns a new XSearcherManager from the given {@link Directory}.
+   * @param dir the directory to open the DirectoryReader on.
+   * @param searcherFactory An optional {@link SearcherFactory}. Pass
+   *        <code>null</code> if you don't require the searcher to be warmed
+   *        before going live or other custom behavior.
+   *        
+   * @throws IOException if there is a low-level I/O error
+   */
+  public XSearcherManager(Directory dir, SearcherFactory searcherFactory) throws IOException {
+    if (searcherFactory == null) {
+      searcherFactory = new SearcherFactory();
+    }
+    this.searcherFactory = searcherFactory;
+    current = getSearcher(searcherFactory, DirectoryReader.open(dir));
+  }
+
+  @Override
+  protected void decRef(IndexSearcher reference) throws IOException {
+    reference.getIndexReader().decRef();
+  }
+  
+  @Override
+  protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException {
+    final IndexReader r = referenceToRefresh.getIndexReader();
+    assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
+    final IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) r);
+    if (newReader == null) {
+      return null;
+    } else {
+      return getSearcher(searcherFactory, newReader);
+    }
+  }
+  
+  @Override
+  protected boolean tryIncRef(IndexSearcher reference) {
+    return reference.getIndexReader().tryIncRef();
+  }
+
+  @Override
+  protected int getRefCount(IndexSearcher reference) {
+    return reference.getIndexReader().getRefCount();
+  }
+
+  /**
+   * Returns <code>true</code> if no changes have occured since this searcher
+   * ie. reader was opened, otherwise <code>false</code>.
+   * @see DirectoryReader#isCurrent() 
+   */
+  public boolean isSearcherCurrent() throws IOException {
+    final IndexSearcher searcher = acquire();
+    try {
+      final IndexReader r = searcher.getIndexReader();
+      assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
+      return ((DirectoryReader) r).isCurrent();
+    } finally {
+      release(searcher);
+    }
+  }
+
+  /** Expert: creates a searcher from the provided {@link
+   *  IndexReader} using the provided {@link
+   *  SearcherFactory}.  NOTE: this decRefs incoming reader
+   * on throwing an exception. */
+  public static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
+    boolean success = false;
+    final IndexSearcher searcher;
+    try {
+      searcher = searcherFactory.newSearcher(reader);
+      if (searcher.getIndexReader() != reader) {
+        throw new IllegalStateException("SearcherFactory must wrap exactly the provided reader (got " + searcher.getIndexReader() + " but expected " + reader + ")");
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        reader.decRef();
+      }
+    }
+    return searcher;
+  }
+}
diff --git a/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java b/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java
new file mode 100644
index 0000000..59ef784
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to Elasticsearch under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elasticsearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.lucene.search.postingshighlight;
+
+import org.apache.lucene.search.highlight.Encoder;
+import org.elasticsearch.search.highlight.HighlightUtils;
+
+/**
+Custom passage formatter that allows us to:
+1) extract different snippets (instead of a single big string) together with their scores ({@link Snippet})
+2) use the {@link Encoder} implementations that are already used with the other highlighters
+ */
+public class CustomPassageFormatter extends PassageFormatter {
+
+    private final String preTag;
+    private final String postTag;
+    private final Encoder encoder;
+
+    public CustomPassageFormatter(String preTag, String postTag, Encoder encoder) {
+        this.preTag = preTag;
+        this.postTag = postTag;
+        this.encoder = encoder;
+    }
+
+    @Override
+    public Snippet[] format(Passage[] passages, String content) {
+        Snippet[] snippets = new Snippet[passages.length];
+        int pos;
+        for (int j = 0; j < passages.length; j++) {
+            Passage passage = passages[j];
+            StringBuilder sb = new StringBuilder();
+            pos = passage.startOffset;
+            for (int i = 0; i < passage.numMatches; i++) {
+                int start = passage.matchStarts[i];
+                int end = passage.matchEnds[i];
+                // its possible to have overlapping terms
+                if (start > pos) {
+                    append(sb, content, pos, start);
+                }
+                if (end > pos) {
+                    sb.append(preTag);
+                    append(sb, content, Math.max(pos, start), end);
+                    sb.append(postTag);
+                    pos = end;
+                }
+            }
+            // its possible a "term" from the analyzer could span a sentence boundary.
+            append(sb, content, pos, Math.max(pos, passage.endOffset));
+            //we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
+            if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
+                sb.deleteCharAt(sb.length() - 1);
+            }
+            //and we trim the snippets too
+            snippets[j] = new Snippet(sb.toString().trim(), passage.score, passage.numMatches > 0);
+        }
+        return snippets;
+    }
+
+    protected void append(StringBuilder dest, String content, int start, int end) {
+        dest.append(encoder.encodeText(content.substring(start, end)));
+    }
+}
diff --git a/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java b/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java
new file mode 100644
index 0000000..be5ad66
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to Elasticsearch under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elasticsearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.lucene.search.postingshighlight;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.search.highlight.HighlightUtils;
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Subclass of the {@link XPostingsHighlighter} that works for a single field in a single document.
+ * It receives the field values as input and it performs discrete highlighting on each single value
+ * calling the highlightDoc method multiple times.
+ * It allows to pass in the query terms to avoid calling extract terms multiple times.
+ *
+ * The use that we make of the postings highlighter is not optimal. It would be much better to
+ * highlight multiple docs in a single call, as we actually lose its sequential IO.  But that would require:
+ * 1) to make our fork more complex and harder to maintain to perform discrete highlighting (needed to return
+ * a different snippet per value when number_of_fragments=0 and the field has multiple values)
+ * 2) refactoring of the elasticsearch highlight api which currently works per hit
+ *
+ */
+public final class CustomPostingsHighlighter extends XPostingsHighlighter {
+
+    private static final Snippet[] EMPTY_SNIPPET = new Snippet[0];
+    private static final Passage[] EMPTY_PASSAGE = new Passage[0];
+
+    private final CustomPassageFormatter passageFormatter;
+    private final int noMatchSize;
+    private final int totalContentLength;
+    private final String[] fieldValues;
+    private final int[] fieldValuesOffsets;
+    private int currentValueIndex = 0;
+
+    private BreakIterator breakIterator;
+
+    public CustomPostingsHighlighter(CustomPassageFormatter passageFormatter, List<Object> fieldValues, boolean mergeValues, int maxLength, int noMatchSize) {
+        super(maxLength);
+        this.passageFormatter = passageFormatter;
+        this.noMatchSize = noMatchSize;
+
+        if (mergeValues) {
+            String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(getMultiValuedSeparator("")));
+            String fieldValue = rawValue.substring(0, Math.min(rawValue.length(), maxLength));
+            this.fieldValues = new String[]{fieldValue};
+            this.fieldValuesOffsets = new int[]{0};
+            this.totalContentLength = fieldValue.length();
+        } else {
+            this.fieldValues = new String[fieldValues.size()];
+            this.fieldValuesOffsets = new int[fieldValues.size()];
+            int contentLength = 0;
+            int offset = 0;
+            int previousLength = -1;
+            for (int i = 0; i < fieldValues.size(); i++) {
+                String rawValue = fieldValues.get(i).toString();
+                String fieldValue = rawValue.substring(0, Math.min(rawValue.length(), maxLength));
+                this.fieldValues[i] = fieldValue;
+                contentLength += fieldValue.length();
+                offset += previousLength + 1;
+                this.fieldValuesOffsets[i] = offset;
+                previousLength = fieldValue.length();
+            }
+            this.totalContentLength = contentLength;
+        }
+    }
+
+    /*
+    Our own api to highlight a single document field, passing in the query terms, and get back our own Snippet object
+     */
+    public Snippet[] highlightDoc(String field, BytesRef[] terms, IndexSearcher searcher, int docId, int maxPassages) throws IOException {
+        IndexReader reader = searcher.getIndexReader();
+        IndexReaderContext readerContext = reader.getContext();
+        List<AtomicReaderContext> leaves = readerContext.leaves();
+
+        String[] contents = new String[]{loadCurrentFieldValue()};
+        Map<Integer, Object> snippetsMap = highlightField(field, contents, getBreakIterator(field), terms, new int[]{docId}, leaves, maxPassages);
+
+        //increment the current value index so that next time we'll highlight the next value if available
+        currentValueIndex++;
+
+        Object snippetObject = snippetsMap.get(docId);
+        if (snippetObject != null && snippetObject instanceof Snippet[]) {
+            return (Snippet[]) snippetObject;
+        }
+        return EMPTY_SNIPPET;
+    }
+
+    /*
+    Method provided through our own fork: allows to do proper scoring when doing per value discrete highlighting.
+    Used to provide the total length of the field (all values) for proper scoring.
+     */
+    @Override
+    protected int getContentLength(String field, int docId) {
+        return totalContentLength;
+    }
+
+    /*
+    Method provided through our own fork: allows to perform proper per value discrete highlighting.
+    Used to provide the offset for the current value.
+     */
+    @Override
+    protected int getOffsetForCurrentValue(String field, int docId) {
+        if (currentValueIndex < fieldValuesOffsets.length) {
+            return fieldValuesOffsets[currentValueIndex];
+        }
+        throw new IllegalArgumentException("No more values offsets to return");
+    }
+
+    public void setBreakIterator(BreakIterator breakIterator) {
+        this.breakIterator = breakIterator;
+    }
+
+    @Override
+    protected PassageFormatter getFormatter(String field) {
+        return passageFormatter;
+    }
+
+    @Override
+    protected BreakIterator getBreakIterator(String field) {
+        if (breakIterator == null) {
+            return super.getBreakIterator(field);
+        }
+        return breakIterator;
+    }
+
+    @Override
+    protected char getMultiValuedSeparator(String field) {
+        //U+2029 PARAGRAPH SEPARATOR (PS): each value holds a discrete passage for highlighting
+        return HighlightUtils.PARAGRAPH_SEPARATOR;
+    }
+
+    /*
+    By default the postings highlighter returns non highlighted snippet when there are no matches.
+    We want to return no snippets by default, unless no_match_size is greater than 0
+     */
+    @Override
+    protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
+        if (noMatchSize > 0) {
+            //we want to return the first sentence of the first snippet only
+            return super.getEmptyHighlight(fieldName, bi, 1);
+        }
+        return EMPTY_PASSAGE;
+    }
+
+    /*
+    Not needed since we call our own loadCurrentFieldValue explicitly, but we override it anyway for consistency.
+     */
+    @Override
+    protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
+        return new String[][]{new String[]{loadCurrentFieldValue()}};
+    }
+
+    /*
+     Our own method that returns the field values, which relies on the content that was provided when creating the highlighter.
+     Supports per value discrete highlighting calling the highlightDoc method multiple times, one per value.
+    */
+    protected String loadCurrentFieldValue() {
+        if (currentValueIndex < fieldValues.length) {
+            return fieldValues[currentValueIndex];
+        }
+        throw new IllegalArgumentException("No more values to return");
+    }
+}
diff --git a/src/main/java/org/apache/lucene/search/postingshighlight/Snippet.java b/src/main/java/org/apache/lucene/search/postingshighlight/Snippet.java
new file mode 100644
index 0000000..bf68020
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/postingshighlight/Snippet.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to Elasticsearch under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elasticsearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.lucene.search.postingshighlight;
+
+/**
+ * Represents a scored highlighted snippet.
+ * It's our own arbitrary object that we get back from the postings highlighter when highlighting a document.
+ * Every snippet contains its formatted text and its score.
+ * The score is needed since we highlight every single value separately and we might want to return snippets sorted by score.
+ */
+public class Snippet {
+
+    private final String text;
+    private final float score;
+    private final boolean isHighlighted;
+
+    public Snippet(String text, float score, boolean isHighlighted) {
+        this.text = text;
+        this.score = score;
+        this.isHighlighted = isHighlighted;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public float getScore() {
+        return score;
+    }
+
+    public boolean isHighlighted() {
+        return isHighlighted;
+    }
+}
diff --git a/src/main/java/org/apache/lucene/search/postingshighlight/XPostingsHighlighter.java b/src/main/java/org/apache/lucene/search/postingshighlight/XPostingsHighlighter.java
new file mode 100644
index 0000000..869b559
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/postingshighlight/XPostingsHighlighter.java
@@ -0,0 +1,777 @@
+/*
+ * Licensed to Elasticsearch under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elasticsearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.lucene.search.postingshighlight;
+
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.UnicodeUtil;
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.util.*;
+
+/*
+FORKED from Lucene 4.5 to be able to:
+1) support discrete highlighting for multiple values, so that we can return a different snippet per value when highlighting the whole text
+2) call the highlightField method directly from subclasses and provide the terms by ourselves
+3) Applied LUCENE-4906 to allow PassageFormatter to return arbitrary objects (LUCENE 4.6)
+
+All our changes start with //BEGIN EDIT
+ */
+public class XPostingsHighlighter {
+
+    //BEGIN EDIT added method to override offset for current value (default 0)
+    //we need this to perform discrete highlighting per field
+    protected int getOffsetForCurrentValue(String field, int docId) {
+        return 0;
+    }
+    //END EDIT
+
+    //BEGIN EDIT
+    //we need this to fix scoring when highlighting every single value separately, since the score depends on the total length of the field (all values rather than only the current one)
+    protected int getContentLength(String field, int docId) {
+        return -1;
+    }
+    //END EDIT
+
+
+    // TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
+    // but if the analyzer is really fast and the field is tiny, this might really be
+    // unnecessary.
+
+    /** for rewriting: we don't want slow processing from MTQs */
+    private static final IndexReader EMPTY_INDEXREADER = new MultiReader();
+
+    /** Default maximum content size to process. Typically snippets
+     *  closer to the beginning of the document better summarize its content */
+    public static final int DEFAULT_MAX_LENGTH = 10000;
+
+    private final int maxLength;
+
+    /** Set the first time {@link #getFormatter} is called,
+     *  and then reused. */
+    private PassageFormatter defaultFormatter;
+
+    /** Set the first time {@link #getScorer} is called,
+     *  and then reused. */
+    private PassageScorer defaultScorer;
+
+    /**
+     * Creates a new highlighter with default parameters.
+     */
+    public XPostingsHighlighter() {
+        this(DEFAULT_MAX_LENGTH);
+    }
+
+    /**
+     * Creates a new highlighter, specifying maximum content length.
+     * @param maxLength maximum content size to process.
+     * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
+     */
+    public XPostingsHighlighter(int maxLength) {
+        if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
+            // two reasons: no overflow problems in BreakIterator.preceding(offset+1),
+            // our sentinel in the offsets queue uses this value to terminate.
+            throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
+        }
+        this.maxLength = maxLength;
+    }
+
+    /** Returns the {@link java.text.BreakIterator} to use for
+     *  dividing text into passages.  This returns
+     *  {@link java.text.BreakIterator#getSentenceInstance(java.util.Locale)} by default;
+     *  subclasses can override to customize. */
+    protected BreakIterator getBreakIterator(String field) {
+        return BreakIterator.getSentenceInstance(Locale.ROOT);
+    }
+
+    /** Returns the {@link PassageFormatter} to use for
+     *  formatting passages into highlighted snippets.  This
+     *  returns a new {@code PassageFormatter} by default;
+     *  subclasses can override to customize. */
+    protected PassageFormatter getFormatter(String field) {
+        if (defaultFormatter == null) {
+            defaultFormatter = new DefaultPassageFormatter();
+        }
+        return defaultFormatter;
+    }
+
+    /** Returns the {@link PassageScorer} to use for
+     *  ranking passages.  This
+     *  returns a new {@code PassageScorer} by default;
+     *  subclasses can override to customize. */
+    protected PassageScorer getScorer(String field) {
+        if (defaultScorer == null) {
+            defaultScorer = new PassageScorer();
+        }
+        return defaultScorer;
+    }
+
+    /**
+     * Highlights the top passages from a single field.
+     *
+     * @param field field name to highlight.
+     *        Must have a stored string value and also be indexed with offsets.
+     * @param query query to highlight.
+     * @param searcher searcher that was previously used to execute the query.
+     * @param topDocs TopDocs containing the summary result documents to highlight.
+     * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>.
+     *         If no highlights were found for a document, the
+     *         first sentence for the field will be returned.
+     * @throws java.io.IOException if an I/O error occurred during processing
+     * @throws IllegalArgumentException if <code>field</code> was indexed without
+     *         {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+     */
+    public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
+        return highlight(field, query, searcher, topDocs, 1);
+    }
+
+    /**
+     * Highlights the top-N passages from a single field.
+     *
+     * @param field field name to highlight.
+     *        Must have a stored string value and also be indexed with offsets.
+     * @param query query to highlight.
+     * @param searcher searcher that was previously used to execute the query.
+     * @param topDocs TopDocs containing the summary result documents to highlight.
+     * @param maxPassages The maximum number of top-N ranked passages used to
+     *        form the highlighted snippets.
+     * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>.
+     *         If no highlights were found for a document, the
+     *         first {@code maxPassages} sentences from the
+     *         field will be returned.
+     * @throws IOException if an I/O error occurred during processing
+     * @throws IllegalArgumentException if <code>field</code> was indexed without
+     *         {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+     */
+    public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
+        Map<String,String[]> res = highlightFields(new String[] { field }, query, searcher, topDocs, new int[] { maxPassages });
+        return res.get(field);
+    }
+
+    /**
+     * Highlights the top passages from multiple fields.
+     * <p>
+     * Conceptually, this behaves as a more efficient form of:
+     * <pre class="prettyprint">
+     * Map m = new HashMap();
+     * for (String field : fields) {
+     *   m.put(field, highlight(field, query, searcher, topDocs));
+     * }
+     * return m;
+     * </pre>
+     *
+     * @param fields field names to highlight.
+     *        Must have a stored string value and also be indexed with offsets.
+     * @param query query to highlight.
+     * @param searcher searcher that was previously used to execute the query.
+     * @param topDocs TopDocs containing the summary result documents to highlight.
+     * @return Map keyed on field name, containing the array of formatted snippets
+     *         corresponding to the documents in <code>topDocs</code>.
+     *         If no highlights were found for a document, the
+     *         first sentence from the field will be returned.
+     * @throws IOException if an I/O error occurred during processing
+     * @throws IllegalArgumentException if <code>field</code> was indexed without
+     *         {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+     */
+    public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
+        int maxPassages[] = new int[fields.length];
+        Arrays.fill(maxPassages, 1);
+        return highlightFields(fields, query, searcher, topDocs, maxPassages);
+    }
+
+    /**
+     * Highlights the top-N passages from multiple fields.
+     * <p>
+     * Conceptually, this behaves as a more efficient form of:
+     * <pre class="prettyprint">
+     * Map m = new HashMap();
+     * for (String field : fields) {
+     *   m.put(field, highlight(field, query, searcher, topDocs, maxPassages));
+     * }
+     * return m;
+     * </pre>
+     *
+     * @param fields field names to highlight.
+     *        Must have a stored string value and also be indexed with offsets.
+     * @param query query to highlight.
+     * @param searcher searcher that was previously used to execute the query.
+     * @param topDocs TopDocs containing the summary result documents to highlight.
+     * @param maxPassages The maximum number of top-N ranked passages per-field used to
+     *        form the highlighted snippets.
+     * @return Map keyed on field name, containing the array of formatted snippets
+     *         corresponding to the documents in <code>topDocs</code>.
+     *         If no highlights were found for a document, the
+     *         first {@code maxPassages} sentences from the
+     *         field will be returned.
+     * @throws IOException if an I/O error occurred during processing
+     * @throws IllegalArgumentException if <code>field</code> was indexed without
+     *         {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+     */
+    public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages[]) throws IOException {
+        final ScoreDoc scoreDocs[] = topDocs.scoreDocs;
+        int docids[] = new int[scoreDocs.length];
+        for (int i = 0; i < docids.length; i++) {
+            docids[i] = scoreDocs[i].doc;
+        }
+
+        return highlightFields(fields, query, searcher, docids, maxPassages);
+    }
+
+    /**
+     * Highlights the top-N passages from multiple fields,
+     * for the provided int[] docids.
+     *
+     * @param fieldsIn field names to highlight.
+     *        Must have a stored string value and also be indexed with offsets.
+     * @param query query to highlight.
+     * @param searcher searcher that was previously used to execute the query.
+     * @param docidsIn containing the document IDs to highlight.
+     * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to
+     *        form the highlighted snippets.
+     * @return Map keyed on field name, containing the array of formatted snippets
+     *         corresponding to the documents in <code>topDocs</code>.
+     *         If no highlights were found for a document, the
+     *         first {@code maxPassages} from the field will
+     *         be returned.
+     * @throws IOException if an I/O error occurred during processing
+     * @throws IllegalArgumentException if <code>field</code> was indexed without
+     *         {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+     */
+    public Map<String,String[]> highlightFields(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
+        Map<String,String[]> snippets = new HashMap<String,String[]>();
+        for(Map.Entry<String,Object[]> ent : highlightFieldsAsObjects(fieldsIn, query, searcher, docidsIn, maxPassagesIn).entrySet()) {
+            Object[] snippetObjects = ent.getValue();
+            String[] snippetStrings = new String[snippetObjects.length];
+            snippets.put(ent.getKey(), snippetStrings);
+            for(int i=0;i<snippetObjects.length;i++) {
+                Object snippet = snippetObjects[i];
+                if (snippet != null) {
+                    snippetStrings[i] = snippet.toString();
+                }
+            }
+        }
+
+        return snippets;
+    }
+
+    public Map<String,Object[]> highlightFieldsAsObjects(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
+        if (fieldsIn.length < 1) {
+            throw new IllegalArgumentException("fieldsIn must not be empty");
+        }
+        if (fieldsIn.length != maxPassagesIn.length) {
+            throw new IllegalArgumentException("invalid number of maxPassagesIn");
+        }
+        final IndexReader reader = searcher.getIndexReader();
+        query = rewrite(query);
+        SortedSet<Term> queryTerms = new TreeSet<Term>();
+        query.extractTerms(queryTerms);
+
+        IndexReaderContext readerContext = reader.getContext();
+        List<AtomicReaderContext> leaves = readerContext.leaves();
+
+        // Make our own copies because we sort in-place:
+        int[] docids = new int[docidsIn.length];
+        System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
+        final String fields[] = new String[fieldsIn.length];
+        System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
+        final int maxPassages[] = new int[maxPassagesIn.length];
+        System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);
+
+        // sort for sequential io
+        Arrays.sort(docids);
+        new InPlaceMergeSorter() {
+
+            @Override
+            protected void swap(int i, int j) {
+                String tmp = fields[i];
+                fields[i] = fields[j];
+                fields[j] = tmp;
+                int tmp2 = maxPassages[i];
+                maxPassages[i] = maxPassages[j];
+                maxPassages[j] = tmp2;
+            }
+
+            @Override
+            protected int compare(int i, int j) {
+                return fields[i].compareTo(fields[j]);
+            }
+
+        }.sort(0, fields.length);
+
+        // pull stored data:
+        String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
+
+        Map<String,Object[]> highlights = new HashMap<String,Object[]>();
+        for (int i = 0; i < fields.length; i++) {
+            String field = fields[i];
+            int numPassages = maxPassages[i];
+
+            Term floor = new Term(field, "");
+            Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
+            SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
+            // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)
+
+            // Strip off the redundant field:
+            BytesRef terms[] = new BytesRef[fieldTerms.size()];
+            int termUpto = 0;
+            for(Term term : fieldTerms) {
+                terms[termUpto++] = term.bytes();
+            }
+            Map<Integer,Object> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
+
+            Object[] result = new Object[docids.length];
+            for (int j = 0; j < docidsIn.length; j++) {
+                result[j] = fieldHighlights.get(docidsIn[j]);
+            }
+            highlights.put(field, result);
+        }
+        return highlights;
+    }
+
+    /** Loads the String values for each field X docID to be
+     *  highlighted.  By default this loads from stored
+     *  fields, but a subclass can change the source.  This
+     *  method should allocate the String[fields.length][docids.length]
+     *  and fill all values.  The returned Strings must be
+     *  identical to what was indexed. */
+    protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
+        String contents[][] = new String[fields.length][docids.length];
+        char valueSeparators[] = new char[fields.length];
+        for (int i = 0; i < fields.length; i++) {
+            valueSeparators[i] = getMultiValuedSeparator(fields[i]);
+        }
+        LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, valueSeparators, maxLength);
+        for (int i = 0; i < docids.length; i++) {
+            searcher.doc(docids[i], visitor);
+            for (int j = 0; j < fields.length; j++) {
+                contents[j][i] = visitor.getValue(j);
+            }
+            visitor.reset();
+        }
+        return contents;
+    }
+
+    /**
+     * Returns the logical separator between values for multi-valued fields.
+     * The default value is a space character, which means passages can span across values,
+     * but a subclass can override, for example with {@code U+2029 PARAGRAPH SEPARATOR (PS)}
+     * if each value holds a discrete passage for highlighting.
+     */
+    protected char getMultiValuedSeparator(String field) {
+        return ' ';
+    }
+
+    //BEGIN EDIT: made protected so that we can call from our subclass and pass in the terms by ourselves
+    protected Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
+    //private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext > leaves, int maxPassages) throws IOException {
+    //END EDIT
+
+        Map<Integer,Object> highlights = new HashMap<Integer,Object>();
+
+        // reuse in the real sense... for docs in same segment we just advance our old enum
+        DocsAndPositionsEnum postings[] = null;
+        TermsEnum termsEnum = null;
+        int lastLeaf = -1;
+
+        PassageFormatter fieldFormatter = getFormatter(field);
+        if (fieldFormatter == null) {
+            throw new NullPointerException("PassageFormatter cannot be null");
+        }
+
+        for (int i = 0; i < docids.length; i++) {
+            String content = contents[i];
+            if (content.length() == 0) {
+                continue; // nothing to do
+            }
+            bi.setText(content);
+            int doc = docids[i];
+            int leaf = ReaderUtil.subIndex(doc, leaves);
+            AtomicReaderContext subContext = leaves.get(leaf);
+            AtomicReader r = subContext.reader();
+            Terms t = r.terms(field);
+            if (t == null) {
+                continue; // nothing to do
+            }
+            if (leaf != lastLeaf) {
+                termsEnum = t.iterator(null);
+                postings = new DocsAndPositionsEnum[terms.length];
+            }
+            Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
+            if (passages.length == 0) {
+                passages = getEmptyHighlight(field, bi, maxPassages);
+            }
+            if (passages.length > 0) {
+                // otherwise a null snippet (eg if field is missing
+                // entirely from the doc)
+                  highlights.put(doc, fieldFormatter.format(passages, content));
+            }
+            lastLeaf = leaf;
+        }
+
+        return highlights;
+    }
+
+    // algorithm: treat sentence snippets as miniature documents
+    // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
+    // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
+    private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength, BreakIterator bi, int doc,
+                                   TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) throws IOException {
+
+        //BEGIN EDIT added call to method that returns the offset for the current value (discrete highlighting)
+        int valueOffset = getOffsetForCurrentValue(field, doc);
+        //END EDIT
+
+        PassageScorer scorer = getScorer(field);
+        if (scorer == null) {
+            throw new NullPointerException("PassageScorer cannot be null");
+        }
+
+
+        //BEGIN EDIT discrete highlighting
+        // the scoring needs to be based on the length of the whole field (all values rather than only the current one)
+        int totalContentLength = getContentLength(field, doc);
+        if (totalContentLength == -1) {
+            totalContentLength = contentLength;
+        }
+        //END EDIT
+
+
+        PriorityQueue<OffsetsEnum> pq = new PriorityQueue<OffsetsEnum>();
+        float weights[] = new float[terms.length];
+        // initialize postings
+        for (int i = 0; i < terms.length; i++) {
+            DocsAndPositionsEnum de = postings[i];
+            int pDoc;
+            if (de == EMPTY) {
+                continue;
+            } else if (de == null) {
+                postings[i] = EMPTY; // initially
+                if (!termsEnum.seekExact(terms[i])) {
+                    continue; // term not found
+                }
+                de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);
+                if (de == null) {
+                    // no positions available
+                    throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
+                }
+                pDoc = de.advance(doc);
+            } else {
+                pDoc = de.docID();
+                if (pDoc < doc) {
+                    pDoc = de.advance(doc);
+                }
+            }
+
+            if (doc == pDoc) {
+                //BEGIN EDIT we take into account the length of the whole field (all values) to properly score the snippets
+                weights[i] = scorer.weight(totalContentLength, de.freq());
+                //weights[i] = scorer.weight(contentLength, de.freq());
+                //END EDIT
+                de.nextPosition();
+                pq.add(new OffsetsEnum(de, i));
+            }
+        }
+
+        pq.add(new OffsetsEnum(EMPTY, Integer.MAX_VALUE)); // a sentinel for termination
+
+        PriorityQueue<Passage> passageQueue = new PriorityQueue<Passage>(n, new Comparator<Passage>() {
+            @Override
+            public int compare(Passage left, Passage right) {
+                if (left.score < right.score) {
+                    return -1;
+                } else if (left.score > right.score) {
+                    return 1;
+                } else {
+                    return left.startOffset - right.startOffset;
+                }
+            }
+        });
+        Passage current = new Passage();
+
+        OffsetsEnum off;
+        while ((off = pq.poll()) != null) {
+            final DocsAndPositionsEnum dp = off.dp;
+
+            int start = dp.startOffset();
+            if (start == -1) {
+                throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
+            }
+            int end = dp.endOffset();
+            // LUCENE-5166: this hit would span the content limit... however more valid
+            // hits may exist (they are sorted by start). so we pretend like we never
+            // saw this term, it won't cause a passage to be added to passageQueue or anything.
+            assert EMPTY.startOffset() == Integer.MAX_VALUE;
+            if (start < contentLength && end > contentLength) {
+                continue;
+            }
+
+
+            //BEGIN EDIT support for discrete highlighting (added block code)
+            //switch to the first match in the current value if there is one
+            boolean seenEnough = false;
+            while (start < valueOffset) {
+                if (off.pos == dp.freq()) {
+                    seenEnough = true;
+                    break;
+                } else {
+                    off.pos++;
+                    dp.nextPosition();
+                    start = dp.startOffset();
+                    end = dp.endOffset();
+                }
+            }
+
+            //continue with next term if we've already seen the current one all the times it appears
+            //that means that the current value doesn't hold matches for the current term
+            if (seenEnough) {
+                continue;
+            }
+
+            //we now subtract the offset of the current value to both start and end
+            start -= valueOffset;
+            end -= valueOffset;
+            //END EDIT
+
+
+            if (start >= current.endOffset) {
+                if (current.startOffset >= 0) {
+                    // finalize current
+                    //BEGIN EDIT we take into account the value offset when scoring the snippet based on its position
+                    current.score *= scorer.norm(current.startOffset + valueOffset);
+                    //current.score *= scorer.norm(current.startOffset);
+                    //END EDIT
+                    // new sentence: first add 'current' to queue
+                    if (passageQueue.size() == n && current.score < passageQueue.peek().score) {
+                        current.reset(); // can't compete, just reset it
+                    } else {
+                        passageQueue.offer(current);
+                        if (passageQueue.size() > n) {
+                            current = passageQueue.poll();
+                            current.reset();
+                        } else {
+                            current = new Passage();
+                        }
+                    }
+                }
+                // if we exceed limit, we are done
+                if (start >= contentLength) {
+                    Passage passages[] = new Passage[passageQueue.size()];
+                    passageQueue.toArray(passages);
+                    for (Passage p : passages) {
+                        p.sort();
+                    }
+                    // sort in ascending order
+                    Arrays.sort(passages, new Comparator<Passage>() {
+                        @Override
+                        public int compare(Passage left, Passage right) {
+                            return left.startOffset - right.startOffset;
+                        }
+                    });
+                    return passages;
+                }
+                // advance breakiterator
+                assert BreakIterator.DONE < 0;
+                current.startOffset = Math.max(bi.preceding(start+1), 0);
+                current.endOffset = Math.min(bi.next(), contentLength);
+            }
+            int tf = 0;
+            while (true) {
+                tf++;
+                current.addMatch(start, end, terms[off.id]);
+                if (off.pos == dp.freq()) {
+                    break; // removed from pq
+                } else {
+                    off.pos++;
+                    dp.nextPosition();
+                    //BEGIN EDIT support for discrete highlighting
+                    start = dp.startOffset() - valueOffset;
+                    end = dp.endOffset() - valueOffset;
+                    //start = dp.startOffset();
+                    //end = dp.endOffset();
+                    //END EDIT
+                }
+                if (start >= current.endOffset || end > contentLength) {
+                    pq.offer(off);
+                    break;
+                }
+            }
+            current.score += weights[off.id] * scorer.tf(tf, current.endOffset - current.startOffset);
+        }
+
+        // Dead code but compiler disagrees:
+        assert false;
+        return null;
+    }
+
+    /** Called to summarize a document when no hits were
+     *  found.  By default this just returns the first
+     *  {@code maxPassages} sentences; subclasses can override
+     *  to customize. */
+    protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
+        // BreakIterator should be un-next'd:
+        List<Passage> passages = new ArrayList<Passage>();
+        int pos = bi.current();
+        assert pos == 0;
+        while (passages.size() < maxPassages) {
+            int next = bi.next();
+            if (next == BreakIterator.DONE) {
+                break;
+            }
+            Passage passage = new Passage();
+            passage.score = Float.NaN;
+            passage.startOffset = pos;
+            passage.endOffset = next;
+            passages.add(passage);
+            pos = next;
+        }
+
+        return passages.toArray(new Passage[passages.size()]);
+    }
+
+    private static class OffsetsEnum implements Comparable<OffsetsEnum> {
+        DocsAndPositionsEnum dp;
+        int pos;
+        int id;
+
+        OffsetsEnum(DocsAndPositionsEnum dp, int id) throws IOException {
+            this.dp = dp;
+            this.id = id;
+            this.pos = 1;
+        }
+
+        @Override
+        public int compareTo(OffsetsEnum other) {
+            try {
+                int off = dp.startOffset();
+                int otherOff = other.dp.startOffset();
+                if (off == otherOff) {
+                    return id - other.id;
+                } else {
+                    return Long.signum(((long)off) - otherOff);
+                }
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+    private static final DocsAndPositionsEnum EMPTY = new DocsAndPositionsEnum() {
+
+        @Override
+        public int nextPosition() throws IOException { return 0; }
+
+        @Override
+        public int startOffset() throws IOException { return Integer.MAX_VALUE; }
+
+        @Override
+        public int endOffset() throws IOException { return Integer.MAX_VALUE; }
+
+        @Override
+        public BytesRef getPayload() throws IOException { return null; }
+
+        @Override
+        public int freq() throws IOException { return 0; }
+
+        @Override
+        public int docID() { return NO_MORE_DOCS; }
+
+        @Override
+        public int nextDoc() throws IOException { return NO_MORE_DOCS; }
+
+        @Override
+        public int advance(int target) throws IOException { return NO_MORE_DOCS; }
+
+        @Override
+        public long cost() { return 0; }
+    };
+
+    /**
+     * we rewrite against an empty indexreader: as we don't want things like
+     * rangeQueries that don't summarize the document
+     */
+    private static Query rewrite(Query original) throws IOException {
+        Query query = original;
+        for (Query rewrittenQuery = query.rewrite(EMPTY_INDEXREADER); rewrittenQuery != query;
+             rewrittenQuery = query.rewrite(EMPTY_INDEXREADER)) {
+            query = rewrittenQuery;
+        }
+        return query;
+    }
+
+    private static class LimitedStoredFieldVisitor extends StoredFieldVisitor {
+        private final String fields[];
+        private final char valueSeparators[];
+        private final int maxLength;
+        private final StringBuilder builders[];
+        private int currentField = -1;
+
+        public LimitedStoredFieldVisitor(String fields[], char valueSeparators[], int maxLength) {
+            assert fields.length == valueSeparators.length;
+            this.fields = fields;
+            this.valueSeparators = valueSeparators;
+            this.maxLength = maxLength;
+            builders = new StringBuilder[fields.length];
+            for (int i = 0; i < builders.length; i++) {
+                builders[i] = new StringBuilder();
+            }
+        }
+
+        @Override
+        public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+            assert currentField >= 0;
+            StringBuilder builder = builders[currentField];
+            if (builder.length() > 0 && builder.length() < maxLength) {
+                builder.append(valueSeparators[currentField]);
+            }
+            if (builder.length() + value.length() > maxLength) {
+                builder.append(value, 0, maxLength - builder.length());
+            } else {
+                builder.append(value);
+            }
+        }
+
+        @Override
+        public Status needsField(FieldInfo fieldInfo) throws IOException {
+            currentField = Arrays.binarySearch(fields, fieldInfo.name);
+            if (currentField < 0) {
+                return Status.NO;
+            } else if (builders[currentField].length() > maxLength) {
+                return fields.length == 1 ? Status.STOP : Status.NO;
+            }
+            return Status.YES;
+        }
+
+        String getValue(int i) {
+            return builders[i].toString();
+        }
+
+        void reset() {
+            currentField = -1;
+            for (int i = 0; i < fields.length; i++) {
+                builders[i].setLength(0);
+            }
+        }
+    }
+}
diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
new file mode 100644
index 0000000..6e8b0a1
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
@@ -0,0 +1,1052 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.search.suggest.analyzing;
+
+import com.carrotsearch.hppc.ObjectIntOpenHashMap;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.TokenStreamToAutomaton;
+import org.apache.lucene.search.suggest.InputIterator;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Sort;
+import org.apache.lucene.store.*;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.*;
+import org.apache.lucene.util.automaton.*;
+import org.apache.lucene.util.fst.*;
+import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.Util.MinResult;
+import org.elasticsearch.common.collect.HppcMaps;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * Suggester that first analyzes the surface form, adds the
+ * analyzed form to a weighted FST, and then does the same
+ * thing at lookup time.  This means lookup is based on the
+ * analyzed form while suggestions are still the surface
+ * form(s).
+ *
+ * <p>
+ * This can result in powerful suggester functionality.  For
+ * example, if you use an analyzer removing stop words, 
+ * then the partial text "ghost chr..." could see the
+ * suggestion "The Ghost of Christmas Past".  Note that
+ * position increments MUST NOT be preserved for this example
+ * to work, so you should call the constructor with
+ * <code>preservePositionIncrements</code> parameter set to
+ * false
+ *
+ * <p>
+ * If SynonymFilter is used to map wifi and wireless network to
+ * hotspot then the partial text "wirele..." could suggest
+ * "wifi router".  Token normalization like stemmers, accent
+ * removal, etc., would allow suggestions to ignore such
+ * variations.
+ *
+ * <p>
+ * When two matching suggestions have the same weight, they
+ * are tie-broken by the analyzed form.  If their analyzed
+ * form is the same then the order is undefined.
+ *
+ * <p>
+ * There are some limitations:
+ * <ul>
+ *
+ *   <li> A lookup from a query like "net" in English won't
+ *        be any different than "net " (ie, user added a
+ *        trailing space) because analyzers don't reflect
+ *        when they've seen a token separator and when they
+ *        haven't.
+ *
+ *   <li> If you're using {@code StopFilter}, and the user will
+ *        type "fast apple", but so far all they've typed is
+ *        "fast a", again because the analyzer doesn't convey whether
+ *        it's seen a token separator after the "a",
+ *        {@code StopFilter} will remove that "a" causing
+ *        far more matches than you'd expect.
+ *
+ *   <li> Lookups with the empty string return no results
+ *        instead of all results.
+ * </ul>
+ * 
+ * @lucene.experimental
+ */
+public class XAnalyzingSuggester extends Lookup {
+
+  /**
+   * FST<Weight,Surface>: 
+   *  input is the analyzed form, with a null byte between terms
+   *  weights are encoded as costs: (Integer.MAX_VALUE-weight)
+   *  surface is the original, unanalyzed form.
+   */
+  private FST<Pair<Long,BytesRef>> fst = null;
+  
+  /** 
+   * Analyzer that will be used for analyzing suggestions at
+   * index time.
+   */
+  private final Analyzer indexAnalyzer;
+
+  /** 
+   * Analyzer that will be used for analyzing suggestions at
+   * query time.
+   */
+  private final Analyzer queryAnalyzer;
+  
+  /** 
+   * True if exact match suggestions should always be returned first.
+   */
+  private final boolean exactFirst;
+  
+  /** 
+   * True if separator between tokens should be preserved.
+   */
+  private final boolean preserveSep;
+
+  /** Include this flag in the options parameter to {@link
+   *  #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to always
+   *  return the exact match first, regardless of score.  This
+   *  has no performance impact but could result in
+   *  low-quality suggestions. */
+  public static final int EXACT_FIRST = 1;
+
+  /** Include this flag in the options parameter to {@link
+   *  #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to preserve
+   *  token separators when matching. */
+  public static final int PRESERVE_SEP = 2;
+
+  /** Represents the separation between tokens, if
+   *  PRESERVE_SEP was specified */
+  public static final int SEP_LABEL = '\u001F';
+
+  /** Marks end of the analyzed input and start of dedup
+   *  byte. */
+  public static final int END_BYTE = 0x0;
+
+  /** Maximum number of dup surface forms (different surface
+   *  forms for the same analyzed form). */
+  private final int maxSurfaceFormsPerAnalyzedForm;
+
+  /** Maximum graph paths to index for a single analyzed
+   *  surface form.  This only matters if your analyzer
+   *  makes lots of alternate paths (e.g. contains
+   *  SynonymFilter). */
+  private final int maxGraphExpansions;
+
+  /** Highest number of analyzed paths we saw for any single
+   *  input surface form.  For analyzers that never create
+   *  graphs this will always be 1. */
+  private int maxAnalyzedPathsForOneInput;
+
+  private boolean hasPayloads;
+
+  private final int sepLabel;
+  private final int payloadSep;
+  private final int endByte;
+  private final int holeCharacter;
+
+  public static final int PAYLOAD_SEP = '\u001F';
+  public static final int HOLE_CHARACTER = '\u001E';
+
+  /** Whether position holes should appear in the automaton. */
+  private boolean preservePositionIncrements;
+
+  /**
+   * Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
+   * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
+   * PRESERVE_SEP, 256, -1)}
+   */
+  public XAnalyzingSuggester(Analyzer analyzer) {
+    this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
+  }
+
+  /**
+   * Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
+   * AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST |
+   * PRESERVE_SEP, 256, -1)}
+   */
+  public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
+    this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
+  }
+
+  /**
+   * Creates a new suggester.
+   * 
+   * @param indexAnalyzer Analyzer that will be used for
+   *   analyzing suggestions while building the index.
+   * @param queryAnalyzer Analyzer that will be used for
+   *   analyzing query text during lookup
+   * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
+   * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
+   *   surface forms to keep for a single analyzed form.
+   *   When there are too many surface forms we discard the
+   *   lowest weighted ones.
+   * @param maxGraphExpansions Maximum number of graph paths
+   *   to expand from the analyzed form.  Set this to -1 for
+   *   no limit.
+   */
+  public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
+                             boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
+                             int sepLabel, int payloadSep, int endByte, int holeCharacter) {
+      // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput 
+    this.indexAnalyzer = indexAnalyzer;
+    this.queryAnalyzer = queryAnalyzer;
+    this.fst = fst;
+    this.hasPayloads = hasPayloads;
+    if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) {
+      throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options);
+    }
+    this.exactFirst = (options & EXACT_FIRST) != 0;
+    this.preserveSep = (options & PRESERVE_SEP) != 0;
+
+    // NOTE: this is just an implementation limitation; if
+    // somehow this is a problem we could fix it by using
+    // more than one byte to disambiguate ... but 256 seems
+    // like it should be way more then enough.
+    if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) {
+      throw new IllegalArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")");
+    }
+    this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
+
+    if (maxGraphExpansions < 1 && maxGraphExpansions != -1) {
+      throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")");
+    }
+    this.maxGraphExpansions = maxGraphExpansions;
+    this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
+    this.preservePositionIncrements = preservePositionIncrements;
+    this.sepLabel = sepLabel;
+    this.payloadSep = payloadSep;
+    this.endByte = endByte;
+    this.holeCharacter = holeCharacter;
+  }
+
+  /** Returns byte size of the underlying FST. */
+  public long sizeInBytes() {
+    return fst == null ? 0 : fst.sizeInBytes();
+  }
+
+  private static void copyDestTransitions(State from, State to, List<Transition> transitions) {
+    if (to.isAccept()) {
+      from.setAccept(true);
+    }
+    for(Transition t : to.getTransitions()) {
+      transitions.add(t);
+    }
+  }
+
+  // Replaces SEP with epsilon or remaps them if
+  // we were asked to preserve them:
+  private static void replaceSep(Automaton a, boolean preserveSep, int replaceSep) {
+
+    State[] states = a.getNumberedStates();
+
+    // Go in reverse topo sort so we know we only have to
+    // make one pass:
+    for(int stateNumber=states.length-1;stateNumber >=0;stateNumber--) {
+      final State state = states[stateNumber];
+      List<Transition> newTransitions = new ArrayList<Transition>();
+      for(Transition t : state.getTransitions()) {
+        assert t.getMin() == t.getMax();
+        if (t.getMin() == TokenStreamToAutomaton.POS_SEP) {
+          if (preserveSep) {
+            // Remap to SEP_LABEL:
+            newTransitions.add(new Transition(replaceSep, t.getDest()));
+          } else {
+            copyDestTransitions(state, t.getDest(), newTransitions);
+            a.setDeterministic(false);
+          }
+        } else if (t.getMin() == TokenStreamToAutomaton.HOLE) {
+
+          // Just remove the hole: there will then be two
+          // SEP tokens next to each other, which will only
+          // match another hole at search time.  Note that
+          // it will also match an empty-string token ... if
+          // that's somehow a problem we can always map HOLE
+          // to a dedicated byte (and escape it in the
+          // input).
+          copyDestTransitions(state, t.getDest(), newTransitions);
+          a.setDeterministic(false);
+        } else {
+          newTransitions.add(t);
+        }
+      }
+      state.setTransitions(newTransitions.toArray(new Transition[newTransitions.size()]));
+    }
+  }
+
+  protected Automaton convertAutomaton(Automaton a) {
+    return a;
+  }
+
+  /** Just escapes the 0xff byte (which we still for SEP). */
+  private static final class  EscapingTokenStreamToAutomaton extends TokenStreamToAutomaton {
+
+    final BytesRef spare = new BytesRef();
+    private char sepLabel;
+
+    public EscapingTokenStreamToAutomaton(char sepLabel) {
+      this.sepLabel = sepLabel;
+    }
+
+    @Override
+    protected BytesRef changeToken(BytesRef in) {
+      int upto = 0;
+      for(int i=0;i<in.length;i++) {
+        byte b = in.bytes[in.offset+i];
+        if (b == (byte) sepLabel) {
+          if (spare.bytes.length == upto) {
+            spare.grow(upto+2);
+          }
+          spare.bytes[upto++] = (byte) sepLabel;
+          spare.bytes[upto++] = b;
+        } else {
+          if (spare.bytes.length == upto) {
+            spare.grow(upto+1);
+          }
+          spare.bytes[upto++] = b;
+        }
+      }
+      spare.offset = 0;
+      spare.length = upto;
+      return spare;
+    }
+  }
+
+  public TokenStreamToAutomaton getTokenStreamToAutomaton() {
+    final TokenStreamToAutomaton tsta;
+    if (preserveSep) {
+      tsta = new EscapingTokenStreamToAutomaton((char) sepLabel);
+    } else {
+      // When we're not preserving sep, we don't steal 0xff
+      // byte, so we don't need to do any escaping:
+      tsta = new TokenStreamToAutomaton();
+    }
+    tsta.setPreservePositionIncrements(preservePositionIncrements);
+    return tsta;
+  }
+  
+  private static class AnalyzingComparator implements Comparator<BytesRef> {
+
+    private final boolean hasPayloads;
+
+    public AnalyzingComparator(boolean hasPayloads) {
+      this.hasPayloads = hasPayloads;
+    }
+
+    private final ByteArrayDataInput readerA = new ByteArrayDataInput();
+    private final ByteArrayDataInput readerB = new ByteArrayDataInput();
+    private final BytesRef scratchA = new BytesRef();
+    private final BytesRef scratchB = new BytesRef();
+
+    @Override
+    public int compare(BytesRef a, BytesRef b) {
+
+      // First by analyzed form:
+      readerA.reset(a.bytes, a.offset, a.length);
+      scratchA.length = readerA.readShort();
+      scratchA.bytes = a.bytes;
+      scratchA.offset = readerA.getPosition();
+
+      readerB.reset(b.bytes, b.offset, b.length);
+      scratchB.bytes = b.bytes;
+      scratchB.length = readerB.readShort();
+      scratchB.offset = readerB.getPosition();
+
+      int cmp = scratchA.compareTo(scratchB);
+      if (cmp != 0) {
+        return cmp;
+      }
+      readerA.skipBytes(scratchA.length);
+      readerB.skipBytes(scratchB.length);
+      // Next by cost:
+      long aCost = readerA.readInt();
+      long bCost = readerB.readInt();
+      if (aCost < bCost) {
+        return -1;
+      } else if (aCost > bCost) {
+        return 1;
+      }
+
+      // Finally by surface form:
+      if (hasPayloads) {
+        scratchA.length = readerA.readShort();
+        scratchA.offset = readerA.getPosition();
+        scratchB.length = readerB.readShort();
+        scratchB.offset = readerB.getPosition();
+      } else {
+        scratchA.offset = readerA.getPosition();
+        scratchA.length = a.length - scratchA.offset;
+        scratchB.offset = readerB.getPosition();
+        scratchB.length = b.length - scratchB.offset;
+      }
+      return scratchA.compareTo(scratchB);
+    }
+  }
+
+  @Override
+  public void build(InputIterator iterator) throws IOException {
+    String prefix = getClass().getSimpleName();
+    File directory = Sort.defaultTempDir();
+    File tempInput = File.createTempFile(prefix, ".input", directory);
+    File tempSorted = File.createTempFile(prefix, ".sorted", directory);
+
+    hasPayloads = iterator.hasPayloads();
+
+    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
+    Sort.ByteSequencesReader reader = null;
+    BytesRef scratch = new BytesRef();
+
+    TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
+
+    boolean success = false;
+    byte buffer[] = new byte[8];
+    try {
+      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+      BytesRef surfaceForm;
+
+      while ((surfaceForm = iterator.next()) != null) {
+        Set<IntsRef> paths = toFiniteStrings(surfaceForm, ts2a);
+        
+        maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, paths.size());
+
+        for (IntsRef path : paths) {
+
+          Util.toBytesRef(path, scratch);
+          
+          // length of the analyzed text (FST input)
+          if (scratch.length > Short.MAX_VALUE-2) {
+            throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length + ")");
+          }
+          short analyzedLength = (short) scratch.length;
+
+          // compute the required length:
+          // analyzed sequence + weight (4) + surface + analyzedLength (short)
+          int requiredLength = analyzedLength + 4 + surfaceForm.length + 2;
+
+          BytesRef payload;
+
+          if (hasPayloads) {
+            if (surfaceForm.length > (Short.MAX_VALUE-2)) {
+              throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
+            }
+            payload = iterator.payload();
+            // payload + surfaceLength (short)
+            requiredLength += payload.length + 2;
+          } else {
+            payload = null;
+          }
+          
+          buffer = ArrayUtil.grow(buffer, requiredLength);
+          
+          output.reset(buffer);
+
+          output.writeShort(analyzedLength);
+
+          output.writeBytes(scratch.bytes, scratch.offset, scratch.length);
+
+          output.writeInt(encodeWeight(iterator.weight()));
+
+          if (hasPayloads) {
+            for(int i=0;i<surfaceForm.length;i++) {
+              if (surfaceForm.bytes[i] == payloadSep) {
+                throw new IllegalArgumentException("surface form cannot contain unit separator character U+001F; this character is reserved");
+              }
+            }
+            output.writeShort((short) surfaceForm.length);
+            output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
+            output.writeBytes(payload.bytes, payload.offset, payload.length);
+          } else {
+            output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
+          }
+
+          assert output.getPosition() == requiredLength: output.getPosition() + " vs " + requiredLength;
+
+          writer.write(buffer, 0, output.getPosition());
+        }
+      }
+      writer.close();
+
+      // Sort all input/output pairs (required by FST.Builder):
+      new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
+
+      // Free disk space:
+      tempInput.delete();
+
+      reader = new Sort.ByteSequencesReader(tempSorted);
+     
+      PairOutputs<Long,BytesRef> outputs = new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
+      Builder<Pair<Long,BytesRef>> builder = new Builder<Pair<Long,BytesRef>>(FST.INPUT_TYPE.BYTE1, outputs);
+
+      // Build FST:
+      BytesRef previousAnalyzed = null;
+      BytesRef analyzed = new BytesRef();
+      BytesRef surface = new BytesRef();
+      IntsRef scratchInts = new IntsRef();
+      ByteArrayDataInput input = new ByteArrayDataInput();
+
+      // Used to remove duplicate surface forms (but we
+      // still index the hightest-weight one).  We clear
+      // this when we see a new analyzed form, so it cannot
+      // grow unbounded (at most 256 entries):
+      Set<BytesRef> seenSurfaceForms = new HashSet<BytesRef>();
+
+      int dedup = 0;
+      while (reader.read(scratch)) {
+        input.reset(scratch.bytes, scratch.offset, scratch.length);
+        short analyzedLength = input.readShort();
+        analyzed.grow(analyzedLength+2);
+        input.readBytes(analyzed.bytes, 0, analyzedLength);
+        analyzed.length = analyzedLength;
+
+        long cost = input.readInt();
+
+        surface.bytes = scratch.bytes;
+        if (hasPayloads) {
+          surface.length = input.readShort();
+          surface.offset = input.getPosition();
+        } else {
+          surface.offset = input.getPosition();
+          surface.length = scratch.length - surface.offset;
+        }
+        
+        if (previousAnalyzed == null) {
+          previousAnalyzed = new BytesRef();
+          previousAnalyzed.copyBytes(analyzed);
+          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
+        } else if (analyzed.equals(previousAnalyzed)) {
+          dedup++;
+          if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
+            // More than maxSurfaceFormsPerAnalyzedForm
+            // dups: skip the rest:
+            continue;
+          }
+          if (seenSurfaceForms.contains(surface)) {
+            continue;
+          }
+          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
+        } else {
+          dedup = 0;
+          previousAnalyzed.copyBytes(analyzed);
+          seenSurfaceForms.clear();
+          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
+        }
+
+        // TODO: I think we can avoid the extra 2 bytes when
+        // there is no dup (dedup==0), but we'd have to fix
+        // the exactFirst logic ... which would be sort of
+        // hairy because we'd need to special case the two
+        // (dup/not dup)...
+
+        // NOTE: must be byte 0 so we sort before whatever
+        // is next
+        analyzed.bytes[analyzed.offset+analyzed.length] = 0;
+        analyzed.bytes[analyzed.offset+analyzed.length+1] = (byte) dedup;
+        analyzed.length += 2;
+
+        Util.toIntsRef(analyzed, scratchInts);
+        //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
+        if (!hasPayloads) {
+          builder.add(scratchInts, outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
+        } else {
+          int payloadOffset = input.getPosition() + surface.length;
+          int payloadLength = scratch.length - payloadOffset;
+          BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
+          System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
+          br.bytes[surface.length] = (byte) payloadSep;
+          System.arraycopy(scratch.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
+          br.length = br.bytes.length;
+          builder.add(scratchInts, outputs.newPair(cost, br));
+        }
+      }
+      fst = builder.finish();
+
+      //PrintWriter pw = new PrintWriter("/tmp/out.dot");
+      //Util.toDot(fst, pw, true, true);
+      //pw.close();
+
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(reader, writer);
+      } else {
+        IOUtils.closeWhileHandlingException(reader, writer);
+      }
+      
+      tempInput.delete();
+      tempSorted.delete();
+    }
+  }
+
+  @Override
+  public boolean store(OutputStream output) throws IOException {
+    DataOutput dataOut = new OutputStreamDataOutput(output);
+    try {
+      if (fst == null) {
+        return false;
+      }
+
+      fst.save(dataOut);
+      dataOut.writeVInt(maxAnalyzedPathsForOneInput);
+      dataOut.writeByte((byte) (hasPayloads ? 1 : 0));
+    } finally {
+      IOUtils.close(output);
+    }
+    return true;
+  }
+
+  @Override
+  public boolean load(InputStream input) throws IOException {
+    DataInput dataIn = new InputStreamDataInput(input);
+    try {
+      this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
+      maxAnalyzedPathsForOneInput = dataIn.readVInt();
+      hasPayloads = dataIn.readByte() == 1;
+    } finally {
+      IOUtils.close(input);
+    }
+    return true;
+  }
+
+  private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRef spare) {
+    LookupResult result;
+    if (hasPayloads) {
+      int sepIndex = -1;
+      for(int i=0;i<output2.length;i++) {
+        if (output2.bytes[output2.offset+i] == payloadSep) {
+          sepIndex = i;
+          break;
+        }
+      }
+      assert sepIndex != -1;
+      spare.grow(sepIndex);
+      final int payloadLen = output2.length - sepIndex - 1;
+      UnicodeUtil.UTF8toUTF16(output2.bytes, output2.offset, sepIndex, spare);
+      BytesRef payload = new BytesRef(payloadLen);
+      System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
+      payload.length = payloadLen;
+      result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
+    } else {
+      spare.grow(output2.length);
+      UnicodeUtil.UTF8toUTF16(output2, spare);
+      result = new LookupResult(spare.toString(), decodeWeight(output1));
+    }
+
+    return result;
+  }
+
+  private boolean sameSurfaceForm(BytesRef key, BytesRef output2) {
+    if (hasPayloads) {
+      // output2 has at least PAYLOAD_SEP byte:
+      if (key.length >= output2.length) {
+        return false;
+      }
+      for(int i=0;i<key.length;i++) {
+        if (key.bytes[key.offset+i] != output2.bytes[output2.offset+i]) {
+          return false;
+        }
+      }
+      return output2.bytes[output2.offset + key.length] == payloadSep;
+    } else {
+      return key.bytesEquals(output2);
+    }
+  }
+
+  @Override
+  public List<LookupResult> lookup(final CharSequence key, boolean onlyMorePopular, int num) {
+    assert num > 0;
+
+    if (onlyMorePopular) {
+      throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false");
+    }
+    if (fst == null) {
+      return Collections.emptyList();
+    }
+
+    //System.out.println("lookup key=" + key + " num=" + num);
+    for (int i = 0; i < key.length(); i++) {
+      if (key.charAt(i) == holeCharacter) {
+        throw new IllegalArgumentException("lookup key cannot contain HOLE character U+001E; this character is reserved");
+      }
+      if (key.charAt(i) == sepLabel) {
+        throw new IllegalArgumentException("lookup key cannot contain unit separator character U+001F; this character is reserved");
+      }
+    }
+    final BytesRef utf8Key = new BytesRef(key);
+    try {
+
+      Automaton lookupAutomaton = toLookupAutomaton(key);
+
+      final CharsRef spare = new CharsRef();
+
+      //System.out.println("  now intersect exactFirst=" + exactFirst);
+    
+      // Intersect automaton w/ suggest wFST and get all
+      // prefix starting nodes & their outputs:
+      //final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst);
+
+      //System.out.println("  prefixPaths: " + prefixPaths.size());
+
+      BytesReader bytesReader = fst.getBytesReader();
+
+      FST.Arc<Pair<Long,BytesRef>> scratchArc = new FST.Arc<Pair<Long,BytesRef>>();
+
+      final List<LookupResult> results = new ArrayList<LookupResult>();
+
+      List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst);
+
+      if (exactFirst) {
+
+        int count = 0;
+        for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
+          if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) {
+            // This node has END_BYTE arc leaving, meaning it's an
+            // "exact" match:
+            count++;
+          }
+        }
+
+        // Searcher just to find the single exact only
+        // match, if present:
+        Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
+        searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst, count * maxSurfaceFormsPerAnalyzedForm, count * maxSurfaceFormsPerAnalyzedForm, weightComparator);
+
+        // NOTE: we could almost get away with only using
+        // the first start node.  The only catch is if
+        // maxSurfaceFormsPerAnalyzedForm had kicked in and
+        // pruned our exact match from one of these nodes
+        // ...:
+        for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
+          if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) {
+            // This node has END_BYTE arc leaving, meaning it's an
+            // "exact" match:
+            searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output), false, path.input);
+          }
+        }
+
+        MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
+
+        // NOTE: this is rather inefficient: we enumerate
+        // every matching "exactly the same analyzed form"
+        // path, and then do linear scan to see if one of
+        // these exactly matches the input.  It should be
+        // possible (though hairy) to do something similar
+        // to getByOutput, since the surface form is encoded
+        // into the FST output, so we more efficiently hone
+        // in on the exact surface-form match.  Still, I
+        // suspect very little time is spent in this linear
+        // seach: it's bounded by how many prefix start
+        // nodes we have and the
+        // maxSurfaceFormsPerAnalyzedForm:
+        for(MinResult<Pair<Long,BytesRef>> completion : completions) {
+          BytesRef output2 = completion.output.output2;
+          if (sameSurfaceForm(utf8Key, output2)) {
+            results.add(getLookupResult(completion.output.output1, output2, spare));
+            break;
+          }
+        }
+
+        if (results.size() == num) {
+          // That was quick:
+          return results;
+        }
+      }
+
+      Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
+      searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
+                                                            num - results.size(),
+                                                            num * maxAnalyzedPathsForOneInput,
+                                                            weightComparator) {
+        private final Set<BytesRef> seen = new HashSet<BytesRef>();
+
+        @Override
+        protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
+
+          // Dedup: when the input analyzes to a graph we
+          // can get duplicate surface forms:
+          if (seen.contains(output.output2)) {
+            return false;
+          }
+          seen.add(output.output2);
+          
+          if (!exactFirst) {
+            return true;
+          } else {
+            // In exactFirst mode, don't accept any paths
+            // matching the surface form since that will
+            // create duplicate results:
+            if (sameSurfaceForm(utf8Key, output.output2)) {
+              // We found exact match, which means we should
+              // have already found it in the first search:
+              assert results.size() == 1;
+              return false;
+            } else {
+              return true;
+            }
+          }
+        }
+      };
+
+      prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
+      
+      for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
+        searcher.addStartPaths(path.fstNode, path.output, true, path.input);
+      }
+
+      MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
+
+      for(MinResult<Pair<Long,BytesRef>> completion : completions) {
+
+        LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
+
+        // TODO: for fuzzy case would be nice to return
+        // how many edits were required
+
+        //System.out.println("    result=" + result);
+        results.add(result);
+
+        if (results.size() == num) {
+          // In the exactFirst=true case the search may
+          // produce one extra path
+          break;
+        }
+      }
+
+      return results;
+    } catch (IOException bogus) {
+      throw new RuntimeException(bogus);
+    }
+  }
+
+  /** Returns all completion paths to initialize the search. */
+  protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
+                                                                       Automaton lookupAutomaton,
+                                                                       FST<Pair<Long,BytesRef>> fst)
+    throws IOException {
+    return prefixPaths;
+  }
+
+  public final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
+    // Analyze surface form:
+    TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
+    return toFiniteStrings(ts2a, ts);
+  }
+  public final Set<IntsRef> toFiniteStrings(final TokenStreamToAutomaton ts2a, TokenStream ts) throws IOException {
+      // Analyze surface form:
+
+      // Create corresponding automaton: labels are bytes
+      // from each analyzed token, with byte 0 used as
+      // separator between tokens:
+      Automaton automaton = ts2a.toAutomaton(ts);
+      ts.close();
+
+      replaceSep(automaton, preserveSep, sepLabel);
+
+      assert SpecialOperations.isFinite(automaton);
+
+      // Get all paths from the automaton (there can be
+      // more than one path, eg if the analyzer created a
+      // graph using SynFilter or WDF):
+
+      // TODO: we could walk & add simultaneously, so we
+      // don't have to alloc [possibly biggish]
+      // intermediate HashSet in RAM:
+      return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
+  }
+
+  final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
+    // Turn tokenstream into automaton:
+    TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
+    Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
+    ts.close();
+
+    // TODO: we could use the end offset to "guess"
+    // whether the final token was a partial token; this
+    // would only be a heuristic ... but maybe an OK one.
+    // This way we could eg differentiate "net" from "net ",
+    // which we can't today...
+
+    replaceSep(automaton, preserveSep, sepLabel);
+
+    // TODO: we can optimize this somewhat by determinizing
+    // while we convert
+    BasicOperations.determinize(automaton);
+    return automaton;
+  }
+  
+  
+
+  /**
+   * Returns the weight associated with an input string,
+   * or null if it does not exist.
+   */
+  public Object get(CharSequence key) {
+    throw new UnsupportedOperationException();
+  }
+  
+  /** cost -> weight */
+  public static int decodeWeight(long encoded) {
+    return (int)(Integer.MAX_VALUE - encoded);
+  }
+  
+  /** weight -> cost */
+  public static int encodeWeight(long value) {
+    if (value < 0 || value > Integer.MAX_VALUE) {
+      throw new UnsupportedOperationException("cannot encode value: " + value);
+    }
+    return Integer.MAX_VALUE - (int)value;
+  }
+   
+  static final Comparator<Pair<Long,BytesRef>> weightComparator = new Comparator<Pair<Long,BytesRef>> () {
+    @Override
+    public int compare(Pair<Long,BytesRef> left, Pair<Long,BytesRef> right) {
+      return left.output1.compareTo(right.output1);
+    }
+  };
+  
+  
+    public static class XBuilder {
+        private Builder<Pair<Long, BytesRef>> builder;
+        private int maxSurfaceFormsPerAnalyzedForm;
+        private IntsRef scratchInts = new IntsRef();
+        private final PairOutputs<Long, BytesRef> outputs;
+        private boolean hasPayloads;
+        private BytesRef analyzed = new BytesRef();
+        private final SurfaceFormAndPayload[] surfaceFormsAndPayload;
+        private int count;
+        private ObjectIntOpenHashMap<BytesRef> seenSurfaceForms = HppcMaps.Object.Integer.ensureNoNullKeys(256, 0.75f);
+        private int payloadSep;
+
+        public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
+            this.payloadSep = payloadSep;
+            this.outputs = new PairOutputs<Long, BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
+            this.builder = new Builder<Pair<Long, BytesRef>>(FST.INPUT_TYPE.BYTE1, outputs);
+            this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
+            this.hasPayloads = hasPayloads;
+            surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];
+
+        }
+        public void startTerm(BytesRef analyzed) {
+            this.analyzed.copyBytes(analyzed);
+            this.analyzed.grow(analyzed.length+2);
+        }
+        
+        private final static class SurfaceFormAndPayload implements Comparable<SurfaceFormAndPayload> {
+            BytesRef payload;
+            long weight;
+            
+            public SurfaceFormAndPayload(BytesRef payload, long cost) {
+                super();
+                this.payload = payload;
+                this.weight = cost;
+            }
+
+            @Override
+            public int compareTo(SurfaceFormAndPayload o) {
+                int res = compare(weight, o.weight);
+                if (res == 0 ){
+                    return payload.compareTo(o.payload);
+                }
+                return res;
+            }
+            public static int compare(long x, long y) {
+                return (x < y) ? -1 : ((x == y) ? 0 : 1);
+            }
+        }
+
+        public void addSurface(BytesRef surface, BytesRef payload, long cost) throws IOException {
+            int surfaceIndex = -1;
+            long encodedWeight = cost == -1 ? cost : encodeWeight(cost);
+            /*
+             * we need to check if we have seen this surface form, if so only use the 
+             * the surface form with the highest weight and drop the rest no matter if 
+             * the payload differs.
+             */
+            if (count >= maxSurfaceFormsPerAnalyzedForm) {
+                // More than maxSurfaceFormsPerAnalyzedForm
+                // dups: skip the rest:
+                return;
+            }
+            BytesRef surfaceCopy;
+            if (count > 0 && seenSurfaceForms.containsKey(surface)) {
+                surfaceIndex = seenSurfaceForms.lget();
+                SurfaceFormAndPayload surfaceFormAndPayload = surfaceFormsAndPayload[surfaceIndex];
+                if (encodedWeight >= surfaceFormAndPayload.weight) {
+                    return;
+                }
+                surfaceCopy = BytesRef.deepCopyOf(surface);
+            } else {
+                surfaceIndex = count++;
+                surfaceCopy = BytesRef.deepCopyOf(surface);
+                seenSurfaceForms.put(surfaceCopy, surfaceIndex);
+            }
+           
+            BytesRef payloadRef;
+            if (!hasPayloads) {
+                payloadRef = surfaceCopy;
+            } else {
+                int len = surface.length + 1 + payload.length;
+                final BytesRef br = new BytesRef(len);
+                System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
+                br.bytes[surface.length] = (byte) payloadSep;
+                System.arraycopy(payload.bytes, payload.offset, br.bytes, surface.length + 1, payload.length);
+                br.length = len;
+                payloadRef = br;
+            }
+            if (surfaceFormsAndPayload[surfaceIndex] == null) {
+                surfaceFormsAndPayload[surfaceIndex] = new SurfaceFormAndPayload(payloadRef, encodedWeight);
+            } else {
+                surfaceFormsAndPayload[surfaceIndex].payload = payloadRef;
+                surfaceFormsAndPayload[surfaceIndex].weight = encodedWeight;
+            }
+        }
+        
+        public void finishTerm(long defaultWeight) throws IOException {
+            ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
+            int deduplicator = 0;
+            analyzed.bytes[analyzed.offset + analyzed.length] = 0;
+            analyzed.length += 2;
+            for (int i = 0; i < count; i++) {
+                analyzed.bytes[analyzed.offset + analyzed.length - 1 ] = (byte) deduplicator++;
+                Util.toIntsRef(analyzed, scratchInts);
+                SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
+                long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
+                builder.add(scratchInts, outputs.newPair(cost, candiate.payload));
+            }
+            seenSurfaceForms.clear();
+            count = 0;
+        }
+
+        public FST<Pair<Long, BytesRef>> build() throws IOException {
+            return builder.finish();
+        }
+
+        public boolean hasPayloads() {
+            return hasPayloads;
+        }
+
+        public int maxSurfaceFormsPerAnalyzedForm() {
+            return maxSurfaceFormsPerAnalyzedForm;
+        }
+
+    }
+}
diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
new file mode 100644
index 0000000..4379cda
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.search.suggest.analyzing;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStreamToAutomaton;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.automaton.*;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Implements a fuzzy {@link AnalyzingSuggester}. The similarity measurement is
+ * based on the Damerau-Levenshtein (optimal string alignment) algorithm, though
+ * you can explicitly choose classic Levenshtein by passing <code>false</code>
+ * for the <code>transpositions</code> parameter.
+ * <p>
+ * At most, this query will match terms up to
+ * {@value org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}
+ * edits. Higher distances are not supported.  Note that the
+ * fuzzy distance is measured in "byte space" on the bytes
+ * returned by the {@link org.apache.lucene.analysis.TokenStream}'s {@link
+ * org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute}, usually UTF8.  By default
+ * the analyzed bytes must be at least 3 {@link
+ * #DEFAULT_MIN_FUZZY_LENGTH} bytes before any edits are
+ * considered.  Furthermore, the first 1 {@link
+ * #DEFAULT_NON_FUZZY_PREFIX} byte is not allowed to be
+ * edited.  We allow up to 1 (@link
+ * #DEFAULT_MAX_EDITS} edit.
+ * If {@link #unicodeAware} parameter in the constructor is set to true, maxEdits,
+ * minFuzzyLength, transpositions and nonFuzzyPrefix are measured in Unicode code
+ * points (actual letters) instead of bytes.*
+ *
+ * <p>
+ * NOTE: This suggester does not boost suggestions that
+ * required no edits over suggestions that did require
+ * edits.  This is a known limitation.
+ *
+ * <p>
+ * Note: complex query analyzers can have a significant impact on the lookup
+ * performance. It's recommended to not use analyzers that drop or inject terms
+ * like synonyms to keep the complexity of the prefix intersection low for good
+ * lookup performance. At index time, complex analyzers can safely be used.
+ * </p>
+ *
+ * @lucene.experimental
+ */
+public final class XFuzzySuggester extends XAnalyzingSuggester {
+    private final int maxEdits;
+    private final boolean transpositions;
+    private final int nonFuzzyPrefix;
+    private final int minFuzzyLength;
+    private final boolean unicodeAware;
+
+    /**
+     *  Measure maxEdits, minFuzzyLength, transpositions and nonFuzzyPrefix
+     *  parameters in Unicode code points (actual letters)
+     *  instead of bytes.
+     */
+    public static final boolean DEFAULT_UNICODE_AWARE = false;
+
+    /**
+     * The default minimum length of the key passed to {@link
+     * #lookup} before any edits are allowed.
+     */
+    public static final int DEFAULT_MIN_FUZZY_LENGTH = 3;
+
+    /**
+     * The default prefix length where edits are not allowed.
+     */
+    public static final int DEFAULT_NON_FUZZY_PREFIX = 1;
+
+    /**
+     * The default maximum number of edits for fuzzy
+     * suggestions.
+     */
+    public static final int DEFAULT_MAX_EDITS = 1;
+
+    /**
+     * The default transposition value passed to {@link org.apache.lucene.util.automaton.LevenshteinAutomata}
+     */
+    public static final boolean DEFAULT_TRANSPOSITIONS = true;
+
+    /**
+     * Creates a {@link FuzzySuggester} instance initialized with default values.
+     *
+     * @param analyzer the analyzer used for this suggester
+     */
+    public XFuzzySuggester(Analyzer analyzer) {
+        this(analyzer, analyzer);
+    }
+
+    /**
+     * Creates a {@link FuzzySuggester} instance with an index & a query analyzer initialized with default values.
+     *
+     * @param indexAnalyzer
+     *           Analyzer that will be used for analyzing suggestions while building the index.
+     * @param queryAnalyzer
+     *           Analyzer that will be used for analyzing query text during lookup
+     */
+    public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
+        this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
+                DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
+
+    }
+
+    /**
+     * Creates a {@link FuzzySuggester} instance.
+     *
+     * @param indexAnalyzer Analyzer that will be used for
+     *        analyzing suggestions while building the index.
+     * @param queryAnalyzer Analyzer that will be used for
+     *        analyzing query text during lookup
+     * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
+     * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
+     *        surface forms to keep for a single analyzed form.
+     *        When there are too many surface forms we discard the
+     *        lowest weighted ones.
+     * @param maxGraphExpansions Maximum number of graph paths
+     *        to expand from the analyzed form.  Set this to -1 for
+     *        no limit.
+     * @param maxEdits must be >= 0 and <= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
+     * @param transpositions <code>true</code> if transpositions should be treated as a primitive
+     *        edit operation. If this is false, comparisons will implement the classic
+     *        Levenshtein algorithm.
+     * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
+     * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
+     * @param sepLabel separation label
+     * @param payloadSep payload separator byte
+     * @param endByte end byte marker byte
+     */
+    public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
+                           int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware,
+                           FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
+                           int sepLabel, int payloadSep, int endByte, int holeCharacter) {
+        super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
+        if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
+            throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
+        }
+        if (nonFuzzyPrefix < 0) {
+            throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
+        }
+        if (minFuzzyLength < 0) {
+            throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
+        }
+
+        this.maxEdits = maxEdits;
+        this.transpositions = transpositions;
+        this.nonFuzzyPrefix = nonFuzzyPrefix;
+        this.minFuzzyLength = minFuzzyLength;
+        this.unicodeAware = unicodeAware;
+    }
+
+    @Override
+    protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths,
+                                                                                     Automaton lookupAutomaton,
+                                                                                     FST<PairOutputs.Pair<Long,BytesRef>> fst)
+            throws IOException {
+
+        // TODO: right now there's no penalty for fuzzy/edits,
+        // ie a completion whose prefix matched exactly what the
+        // user typed gets no boost over completions that
+        // required an edit, which get no boost over completions
+        // requiring two edits.  I suspect a multiplicative
+        // factor is appropriate (eg, say a fuzzy match must be at
+        // least 2X better weight than the non-fuzzy match to
+        // "compete") ... in which case I think the wFST needs
+        // to be log weights or something ...
+
+        Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
+    /*
+      Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
+      w.write(levA.toDot());
+      w.close();
+      System.out.println("Wrote LevA to out.dot");
+    */
+        return FSTUtil.intersectPrefixPaths(levA, fst);
+    }
+
+    @Override
+    protected Automaton convertAutomaton(Automaton a) {
+      if (unicodeAware) {
+        Automaton utf8automaton = new UTF32ToUTF8().convert(a);
+        BasicOperations.determinize(utf8automaton);
+        return utf8automaton;
+      } else {
+        return a;
+      }
+    }
+
+    @Override
+    public TokenStreamToAutomaton getTokenStreamToAutomaton() {
+      final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
+      tsta.setUnicodeArcs(unicodeAware);
+      return tsta;
+    }
+
+    Automaton toLevenshteinAutomata(Automaton automaton) {
+        final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
+        Automaton subs[] = new Automaton[ref.size()];
+        int upto = 0;
+        for (IntsRef path : ref) {
+            if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
+                subs[upto] = BasicAutomata.makeString(path.ints, path.offset, path.length);
+                upto++;
+            } else {
+                Automaton prefix = BasicAutomata.makeString(path.ints, path.offset, nonFuzzyPrefix);
+                int ints[] = new int[path.length-nonFuzzyPrefix];
+                System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
+                // TODO: maybe add alphaMin to LevenshteinAutomata,
+                // and pass 1 instead of 0?  We probably don't want
+                // to allow the trailing dedup bytes to be
+                // edited... but then 0 byte is "in general" allowed
+                // on input (but not in UTF8).
+                LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
+                Automaton levAutomaton = lev.toAutomaton(maxEdits);
+                Automaton combined = BasicOperations.concatenate(Arrays.asList(prefix, levAutomaton));
+                combined.setDeterministic(true); // its like the special case in concatenate itself, except we cloneExpanded already
+                subs[upto] = combined;
+                upto++;
+            }
+        }
+
+        if (subs.length == 0) {
+            // automaton is empty, there is no accepted paths through it
+            return BasicAutomata.makeEmpty(); // matches nothing
+        } else if (subs.length == 1) {
+            // no synonyms or anything: just a single path through the tokenstream
+            return subs[0];
+        } else {
+            // multiple paths: this is really scary! is it slow?
+            // maybe we should not do this and throw UOE?
+            Automaton a = BasicOperations.union(Arrays.asList(subs));
+            // TODO: we could call toLevenshteinAutomata() before det?
+            // this only happens if you have multiple paths anyway (e.g. synonyms)
+            BasicOperations.determinize(a);
+
+            return a;
+        }
+    }
+}
diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
new file mode 100644
index 0000000..8512e5c
--- /dev/null
+++ b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search.vectorhighlight;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.FilterClause;
+import org.apache.lucene.queries.TermFilter;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.util.Version;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.elasticsearch.common.lucene.search.XBooleanFilter;
+import org.elasticsearch.common.lucene.search.XFilteredQuery;
+import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
+import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ *
+ */
+// LUCENE MONITOR
+public class CustomFieldQuery extends FieldQuery {
+
+    private static Field multiTermQueryWrapperFilterQueryField;
+
+    static {
+        try {
+            multiTermQueryWrapperFilterQueryField = MultiTermQueryWrapperFilter.class.getDeclaredField("query");
+            multiTermQueryWrapperFilterQueryField.setAccessible(true);
+        } catch (NoSuchFieldException e) {
+            // ignore
+        }
+    }
+
+    public static final ThreadLocal<Boolean> highlightFilters = new ThreadLocal<Boolean>();
+
+    public CustomFieldQuery(Query query, IndexReader reader, FastVectorHighlighter highlighter) throws IOException {
+        this(query, reader, highlighter.isPhraseHighlight(), highlighter.isFieldMatch());
+    }
+
+    public CustomFieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch) throws IOException {
+        super(query, reader, phraseHighlight, fieldMatch);
+        highlightFilters.remove();
+    }
+
+    @Override
+    void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
+        if (sourceQuery instanceof SpanTermQuery) {
+            super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries);
+        } else if (sourceQuery instanceof ConstantScoreQuery) {
+            ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) sourceQuery;
+            if (constantScoreQuery.getFilter() != null) {
+                flatten(constantScoreQuery.getFilter(), reader, flatQueries);
+            } else {
+                flatten(constantScoreQuery.getQuery(), reader, flatQueries);
+            }
+        } else if (sourceQuery instanceof FunctionScoreQuery) {
+            flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
+        } else if (sourceQuery instanceof FilteredQuery) {
+            flatten(((FilteredQuery) sourceQuery).getQuery(), reader, flatQueries);
+            flatten(((FilteredQuery) sourceQuery).getFilter(), reader, flatQueries);
+        } else if (sourceQuery instanceof XFilteredQuery) {
+            flatten(((XFilteredQuery) sourceQuery).getQuery(), reader, flatQueries);
+            flatten(((XFilteredQuery) sourceQuery).getFilter(), reader, flatQueries);
+        } else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
+            flatten(sourceQuery.rewrite(reader), reader, flatQueries);
+        } else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
+            flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
+        } else if (sourceQuery instanceof MultiPhraseQuery) {
+            MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
+            convertMultiPhraseQuery(0, new int[q.getTermArrays().size()] , q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
+        } else {
+            super.flatten(sourceQuery, reader, flatQueries);
+        }
+    }
+    
+    private void convertMultiPhraseQuery(int currentPos, int[] termsIdx, MultiPhraseQuery orig, List<Term[]> terms, int[] pos, IndexReader reader, Collection<Query> flatQueries) throws IOException {
+        if (currentPos == 0) {
+            // if we have more than 16 terms 
+            int numTerms = 0;
+            for (Term[] currentPosTerm : terms) {
+                numTerms += currentPosTerm.length;
+            }
+            if (numTerms > 16) {
+                for (Term[] currentPosTerm : terms) {
+                    for (Term term : currentPosTerm) {
+                        super.flatten(new TermQuery(term), reader, flatQueries);    
+                    }
+                }
+                return;
+            }
+        }
+        /*
+         * we walk all possible ways and for each path down the MPQ we create a PhraseQuery this is what FieldQuery supports.
+         * It seems expensive but most queries will pretty small.
+         */
+        if (currentPos == terms.size()) {
+            PhraseQuery query = new PhraseQuery();
+            query.setBoost(orig.getBoost());
+            query.setSlop(orig.getSlop());
+            for (int i = 0; i < termsIdx.length; i++) {
+                query.add(terms.get(i)[termsIdx[i]], pos[i]);
+            }
+            this.flatten(query, reader, flatQueries);
+        } else {
+            Term[] t = terms.get(currentPos);
+            for (int i = 0; i < t.length; i++) {
+                termsIdx[currentPos] = i;
+                convertMultiPhraseQuery(currentPos+1, termsIdx, orig, terms, pos, reader, flatQueries);
+            }
+        }
+    }
+    
+    void flatten(Filter sourceFilter, IndexReader reader, Collection<Query> flatQueries) throws IOException {
+        Boolean highlight = highlightFilters.get();
+        if (highlight == null || highlight.equals(Boolean.FALSE)) {
+            return;
+        }
+        if (sourceFilter instanceof TermFilter) {
+            flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), reader, flatQueries);
+        } else if (sourceFilter instanceof MultiTermQueryWrapperFilter) {
+            if (multiTermQueryWrapperFilterQueryField != null) {
+                try {
+                    flatten((Query) multiTermQueryWrapperFilterQueryField.get(sourceFilter), reader, flatQueries);
+                } catch (IllegalAccessException e) {
+                    // ignore
+                }
+            }
+        } else if (sourceFilter instanceof XBooleanFilter) {
+            XBooleanFilter booleanFilter = (XBooleanFilter) sourceFilter;
+            for (FilterClause clause : booleanFilter.clauses()) {
+                if (clause.getOccur() == BooleanClause.Occur.MUST || clause.getOccur() == BooleanClause.Occur.SHOULD) {
+                    flatten(clause.getFilter(), reader, flatQueries);
+                }
+            }
+        }
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/BufferedChecksumIndexOutput.java b/src/main/java/org/apache/lucene/store/BufferedChecksumIndexOutput.java
new file mode 100644
index 0000000..0c64a93
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/BufferedChecksumIndexOutput.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.store;
+
+import java.io.IOException;
+import java.util.zip.Checksum;
+
+/**
+ */
+public class BufferedChecksumIndexOutput extends BufferedIndexOutput {
+
+    private final IndexOutput delegate;
+    private final BufferedIndexOutput bufferedDelegate;
+    private final Checksum digest;
+
+    public BufferedChecksumIndexOutput(IndexOutput delegate, Checksum digest) {
+        super(delegate instanceof BufferedIndexOutput ? ((BufferedIndexOutput) delegate).getBufferSize() : BufferedIndexOutput.DEFAULT_BUFFER_SIZE);
+        if (delegate instanceof BufferedIndexOutput) {
+            bufferedDelegate = (BufferedIndexOutput) delegate;
+            this.delegate = delegate;
+        } else {
+            this.delegate = delegate;
+            bufferedDelegate = null;
+        }
+        this.digest = digest;
+    }
+
+    public Checksum digest() {
+        return digest;
+    }
+
+    public IndexOutput underlying() {
+        return this.delegate;
+    }
+
+    // don't override it, base class method simple reads from input and writes to this output
+//        @Override public void copyBytes(IndexInput input, long numBytes) throws IOException {
+//            delegate.copyBytes(input, numBytes);
+//        }
+
+    @Override
+    public void close() throws IOException {
+        try {
+            super.close();
+        } finally {
+            delegate.close();
+        }
+        
+    }
+
+    @Override
+    protected void flushBuffer(byte[] b, int offset, int len) throws IOException {
+        if (bufferedDelegate != null) {
+            bufferedDelegate.flushBuffer(b, offset, len);
+        } else {
+            delegate.writeBytes(b, offset, len);
+        }
+        digest.update(b, offset, len);
+    }
+
+    // don't override it, base class method simple reads from input and writes to this output
+//        @Override public void copyBytes(IndexInput input, long numBytes) throws IOException {
+//            delegate.copyBytes(input, numBytes);
+//        }
+
+    @Override
+    public void flush() throws IOException {
+        try {
+            super.flush();
+        } finally {
+            delegate.flush();
+        }
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+        // seek might be called on files, which means that the checksum is not file checksum
+        // but a checksum of the bytes written to this stream, which is the same for each
+        // type of file in lucene
+        super.seek(pos);
+        delegate.seek(pos);
+    }
+
+    @Override
+    public long length() throws IOException {
+        return delegate.length();
+    }
+
+    @Override
+    public void setLength(long length) throws IOException {
+        delegate.setLength(length);
+    }
+
+    @Override
+    public String toString() {
+        return delegate.toString();
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/RateLimitedFSDirectory.java b/src/main/java/org/apache/lucene/store/RateLimitedFSDirectory.java
new file mode 100644
index 0000000..814a75f
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/RateLimitedFSDirectory.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.store;
+
+import org.apache.lucene.store.IOContext.Context;
+
+import java.io.IOException;
+
+public final class RateLimitedFSDirectory extends FilterDirectory{
+
+    private final StoreRateLimiting.Provider rateLimitingProvider;
+
+    private final StoreRateLimiting.Listener rateListener;
+
+    public RateLimitedFSDirectory(FSDirectory wrapped, StoreRateLimiting.Provider rateLimitingProvider,
+                                  StoreRateLimiting.Listener rateListener) {
+        super(wrapped);
+        this.rateLimitingProvider = rateLimitingProvider;
+        this.rateListener = rateListener;
+    }
+
+    @Override
+    public IndexOutput createOutput(String name, IOContext context) throws IOException {
+        final IndexOutput output = in.createOutput(name, context);
+
+        StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
+        StoreRateLimiting.Type type = rateLimiting.getType();
+        RateLimiter limiter = rateLimiting.getRateLimiter();
+        if (type == StoreRateLimiting.Type.NONE || limiter == null) {
+            return output;
+        }
+        if (context.context == Context.MERGE) {
+            // we are mering, and type is either MERGE or ALL, rate limit...
+            return new RateLimitedIndexOutput(limiter, rateListener, output);
+        }
+        if (type == StoreRateLimiting.Type.ALL) {
+            return new RateLimitedIndexOutput(limiter, rateListener, output);
+        }
+        // we shouldn't really get here...
+        return output;
+    }
+
+
+    @Override
+    public void close() throws IOException {
+        in.close();
+    }
+
+    @Override
+    public String toString() {
+        StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
+        StoreRateLimiting.Type type = rateLimiting.getType();
+        RateLimiter limiter = rateLimiting.getRateLimiter();
+        if (type == StoreRateLimiting.Type.NONE || limiter == null) {
+            return StoreUtils.toString(in);
+        } else {
+            return "rate_limited(" + StoreUtils.toString(in) + ", type=" + type.name() + ", rate=" + limiter.getMbPerSec() + ")";
+        }
+    }
+
+
+    static final class RateLimitedIndexOutput extends BufferedIndexOutput {
+
+        private final IndexOutput delegate;
+        private final BufferedIndexOutput bufferedDelegate;
+        private final RateLimiter rateLimiter;
+        private final StoreRateLimiting.Listener rateListener;
+
+        RateLimitedIndexOutput(final RateLimiter rateLimiter, final StoreRateLimiting.Listener rateListener, final IndexOutput delegate) {
+            super(delegate instanceof BufferedIndexOutput ? ((BufferedIndexOutput) delegate).getBufferSize() : BufferedIndexOutput.DEFAULT_BUFFER_SIZE);
+            if (delegate instanceof BufferedIndexOutput) {
+                bufferedDelegate = (BufferedIndexOutput) delegate;
+                this.delegate = delegate;
+            } else {
+                this.delegate = delegate;
+                bufferedDelegate = null;
+            }
+            this.rateLimiter = rateLimiter;
+            this.rateListener = rateListener;
+        }
+
+        @Override
+        protected void flushBuffer(byte[] b, int offset, int len) throws IOException {
+            rateListener.onPause(rateLimiter.pause(len));
+            if (bufferedDelegate != null) {
+                bufferedDelegate.flushBuffer(b, offset, len);
+            } else {
+                delegate.writeBytes(b, offset, len);
+            }
+
+        }
+
+        @Override
+        public long length() throws IOException {
+            return delegate.length();
+        }
+
+        @Override
+        public void seek(long pos) throws IOException {
+            flush();
+            delegate.seek(pos);
+        }
+
+        @Override
+        public void flush() throws IOException {
+            try {
+                super.flush();
+            } finally {
+                delegate.flush();
+            }
+        }
+
+        @Override
+        public void setLength(long length) throws IOException {
+            delegate.setLength(length);
+        }
+
+        @Override
+        public void close() throws IOException {
+            try {
+                super.close();
+            } finally {
+                delegate.close();
+            }
+        }
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/StoreRateLimiting.java b/src/main/java/org/apache/lucene/store/StoreRateLimiting.java
new file mode 100644
index 0000000..8b745ef
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/StoreRateLimiting.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.store;
+
+import org.apache.lucene.store.RateLimiter.SimpleRateLimiter;
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.unit.ByteSizeValue;
+
+/**
+ */
+public class StoreRateLimiting {
+
+    public static interface Provider {
+
+        StoreRateLimiting rateLimiting();
+    }
+
+    public interface Listener {
+
+        void onPause(long nanos);
+    }
+
+    public static enum Type {
+        NONE,
+        MERGE,
+        ALL;
+
+        public static Type fromString(String type) throws ElasticsearchIllegalArgumentException {
+            if ("none".equalsIgnoreCase(type)) {
+                return NONE;
+            } else if ("merge".equalsIgnoreCase(type)) {
+                return MERGE;
+            } else if ("all".equalsIgnoreCase(type)) {
+                return ALL;
+            }
+            throw new ElasticsearchIllegalArgumentException("rate limiting type [" + type + "] not valid, can be one of [all|merge|none]");
+        }
+    }
+
+    private final SimpleRateLimiter rateLimiter = new SimpleRateLimiter(0);
+    private volatile SimpleRateLimiter actualRateLimiter;
+
+    private volatile Type type;
+
+    public StoreRateLimiting() {
+
+    }
+
+    @Nullable
+    public RateLimiter getRateLimiter() {
+        return actualRateLimiter;
+    }
+
+    public void setMaxRate(ByteSizeValue rate) {
+        if (rate.bytes() <= 0) {
+            actualRateLimiter = null;
+        } else if (actualRateLimiter == null) {
+            actualRateLimiter = rateLimiter;
+            actualRateLimiter.setMbPerSec(rate.mbFrac());
+        } else {
+            assert rateLimiter == actualRateLimiter;
+            rateLimiter.setMbPerSec(rate.mbFrac());
+        }
+    }
+
+    public Type getType() {
+        return type;
+    }
+
+    public void setType(Type type) {
+        this.type = type;
+    }
+
+    public void setType(String type) throws ElasticsearchIllegalArgumentException {
+        this.type = Type.fromString(type);
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/StoreUtils.java b/src/main/java/org/apache/lucene/store/StoreUtils.java
new file mode 100644
index 0000000..5364970
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/StoreUtils.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.store;
+
+/**
+ */
+public final class StoreUtils {
+
+    private StoreUtils() {
+
+    }
+
+    public static String toString(Directory directory) {
+        if (directory instanceof NIOFSDirectory) {
+            NIOFSDirectory niofsDirectory = (NIOFSDirectory)directory;
+            return "niofs(" + niofsDirectory.getDirectory() + ")";
+        }
+        if (directory instanceof MMapDirectory) {
+            MMapDirectory mMapDirectory = (MMapDirectory)directory;
+            return "mmapfs(" + mMapDirectory.getDirectory() + ")";
+        }
+        if (directory instanceof SimpleFSDirectory) {
+            SimpleFSDirectory simpleFSDirectory = (SimpleFSDirectory)directory;
+            return "simplefs(" + simpleFSDirectory.getDirectory() + ")";
+        }
+        return directory.toString();
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferAllocator.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferAllocator.java
new file mode 100644
index 0000000..27b4a1a
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferAllocator.java
@@ -0,0 +1,97 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.nio.ByteBuffer;
+
+/**
+ * A byte buffer allocator simple allocates byte buffers, and handles releasing
+ * them. Implementation can include special direct buffer cleaning when releasing
+ * a buffer, as well as caching of byte buffers.
+ * <p/>
+ * <p>There are two types of buffers that can be allocated, small and big. This
+ * comes in handy when knowing in advance (more or less) the size of the buffers
+ * needed (large files or small), as well as in caching implementations.
+ */
+public interface ByteBufferAllocator {
+
+    /**
+     * Helper class to allocator implementations allowing to clean direct buffers.
+     */
+    public static class Cleaner {
+        public static final boolean CLEAN_SUPPORTED;
+        private static final Method directBufferCleaner;
+        private static final Method directBufferCleanerClean;
+
+        static {
+            Method directBufferCleanerX = null;
+            Method directBufferCleanerCleanX = null;
+            boolean v;
+            try {
+                directBufferCleanerX = Class.forName("java.nio.DirectByteBuffer").getMethod("cleaner");
+                directBufferCleanerX.setAccessible(true);
+                directBufferCleanerCleanX = Class.forName("sun.misc.Cleaner").getMethod("clean");
+                directBufferCleanerCleanX.setAccessible(true);
+                v = true;
+            } catch (Exception e) {
+                v = false;
+            }
+            CLEAN_SUPPORTED = v;
+            directBufferCleaner = directBufferCleanerX;
+            directBufferCleanerClean = directBufferCleanerCleanX;
+        }
+
+        public static void clean(ByteBuffer buffer) {
+            if (CLEAN_SUPPORTED && buffer.isDirect()) {
+                try {
+                    Object cleaner = directBufferCleaner.invoke(buffer);
+                    directBufferCleanerClean.invoke(cleaner);
+                } catch (Exception e) {
+                    // silently ignore exception
+                }
+            }
+        }
+    }
+
+    public static enum Type {
+        SMALL,
+        LARGE
+    }
+
+    /**
+     * The size (in bytes) that is allocated for the provided type.
+     */
+    int sizeInBytes(Type type);
+
+    /**
+     * Allocate a byte buffer for the specific type.
+     */
+    ByteBuffer allocate(Type type) throws IOException;
+
+    /**
+     * Release the buffer.
+     */
+    void release(ByteBuffer buffer);
+
+    /**
+     * Close the allocator, releasing any cached buffers for example.
+     */
+    void close();
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferDirectory.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferDirectory.java
new file mode 100644
index 0000000..9403085
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferDirectory.java
@@ -0,0 +1,182 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.lucene.store.*;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * A memory based directory that uses {@link java.nio.ByteBuffer} in order to store the directory content.
+ * <p/>
+ * <p>The benefit of using {@link java.nio.ByteBuffer} is the fact that it can be stored in "native" memory
+ * outside of the JVM heap, thus not incurring the GC overhead of large in memory index.
+ * <p/>
+ * <p>Each "file" is segmented into one or more byte buffers.
+ * <p/>
+ * <p>If constructed with {@link ByteBufferAllocator}, it allows to control the allocation and release of
+ * byte buffer. For example, custom implementations can include caching of byte buffers.
+ */
+public class ByteBufferDirectory extends BaseDirectory {
+
+    protected final Map<String, ByteBufferFile> files = new ConcurrentHashMap<String, ByteBufferFile>();
+
+    private final ByteBufferAllocator allocator;
+
+    private final boolean internalAllocator;
+
+    final AtomicLong sizeInBytes = new AtomicLong();
+
+
+    /**
+     * Constructs a new directory using {@link PlainByteBufferAllocator}.
+     */
+    public ByteBufferDirectory() {
+        this.allocator = new PlainByteBufferAllocator(false, 1024, 1024 * 10);
+        this.internalAllocator = true;
+        try {
+            setLockFactory(new SingleInstanceLockFactory());
+        } catch (IOException e) {
+            // will not happen
+        }
+    }
+
+    /**
+     * Constructs a new byte buffer directory with a custom allocator.
+     */
+    public ByteBufferDirectory(ByteBufferAllocator allocator) {
+        this.allocator = allocator;
+        this.internalAllocator = false;
+        try {
+            setLockFactory(new SingleInstanceLockFactory());
+        } catch (IOException e) {
+            // will not happen
+        }
+    }
+
+    /**
+     * Returns the size in bytes of the directory, chunk by buffer size.
+     */
+    public long sizeInBytes() {
+        return sizeInBytes.get();
+    }
+
+    public void sync(Collection<String> names) throws IOException {
+        // nothing to do here
+    }
+
+    @Override
+    public String[] listAll() throws IOException {
+        return files.keySet().toArray(new String[0]);
+    }
+
+    @Override
+    public boolean fileExists(String name) throws IOException {
+        return files.containsKey(name);
+    }
+
+    @Override
+    public void deleteFile(String name) throws IOException {
+        ByteBufferFile file = files.remove(name);
+        if (file == null)
+            throw new FileNotFoundException(name);
+        sizeInBytes.addAndGet(-file.sizeInBytes());
+        file.delete();
+    }
+
+    @Override
+    public long fileLength(String name) throws IOException {
+        ByteBufferFile file = files.get(name);
+        if (file == null)
+            throw new FileNotFoundException(name);
+        return file.getLength();
+    }
+
+    private final static ImmutableSet<String> SMALL_FILES_SUFFIXES = ImmutableSet.of(
+            "del", // 1 bit per doc
+            "cfe", // compound file metadata
+            "si", // segment info
+            "fnm"  // field info (metadata like omit norms etc)
+    );
+
+    private static boolean isSmallFile(String fileName) {
+        if (fileName.startsWith("segments")) {
+            return true;
+        }
+        if (fileName.lastIndexOf('.') > 0) {
+            String suffix = fileName.substring(fileName.lastIndexOf('.') + 1);
+            return SMALL_FILES_SUFFIXES.contains(suffix);
+        }
+        return false;
+    }
+
+    @Override
+    public IndexOutput createOutput(String name, IOContext context) throws IOException {
+        ByteBufferAllocator.Type allocatorType = ByteBufferAllocator.Type.LARGE;
+        if (isSmallFile(name)) {
+            allocatorType = ByteBufferAllocator.Type.SMALL;
+        }
+        ByteBufferFileOutput file = new ByteBufferFileOutput(this, allocator.sizeInBytes(allocatorType));
+        ByteBufferFile existing = files.put(name, file);
+        if (existing != null) {
+            sizeInBytes.addAndGet(-existing.sizeInBytes());
+            existing.delete();
+        }
+        return new ByteBufferIndexOutput(this, name, allocator, allocatorType, file);
+    }
+
+    void closeOutput(String name, ByteBufferFileOutput file) {
+        // we replace the output file with a read only file, with no sync
+        files.put(name, new ByteBufferFile(file));
+    }
+
+    @Override
+    public IndexInput openInput(String name, IOContext context) throws IOException {
+        ByteBufferFile file = files.get(name);
+        if (file == null)
+            throw new FileNotFoundException(name);
+        return new ByteBufferIndexInput(name, file);
+    }
+
+    @Override
+    public void close() throws IOException {
+        String[] files = listAll();
+        for (String file : files) {
+            deleteFile(file);
+        }
+        if (internalAllocator) {
+            allocator.close();
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "byte_buffer";
+    }
+
+    void releaseBuffer(ByteBuffer byteBuffer) {
+        allocator.release(byteBuffer);
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferFile.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferFile.java
new file mode 100644
index 0000000..b726d77
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferFile.java
@@ -0,0 +1,102 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ */
+public class ByteBufferFile {
+
+    final ByteBufferDirectory dir;
+
+    final int bufferSize;
+
+    final List<ByteBuffer> buffers;
+
+    long length;
+
+    volatile long lastModified = System.currentTimeMillis();
+
+    final AtomicInteger refCount;
+
+    long sizeInBytes;
+
+    public ByteBufferFile(ByteBufferDirectory dir, int bufferSize) {
+        this.dir = dir;
+        this.bufferSize = bufferSize;
+        this.buffers = new ArrayList<ByteBuffer>();
+        this.refCount = new AtomicInteger(1);
+    }
+
+    ByteBufferFile(ByteBufferFile file) {
+        this.dir = file.dir;
+        this.bufferSize = file.bufferSize;
+        this.buffers = file.buffers;
+        this.length = file.length;
+        this.lastModified = file.lastModified;
+        this.refCount = file.refCount;
+        this.sizeInBytes = file.sizeInBytes;
+    }
+
+    public long getLength() {
+        return length;
+    }
+
+    public long getLastModified() {
+        return lastModified;
+    }
+
+    void setLastModified(long lastModified) {
+        this.lastModified = lastModified;
+    }
+
+    long sizeInBytes() {
+        return sizeInBytes;
+    }
+
+    ByteBuffer getBuffer(int index) {
+        return buffers.get(index);
+    }
+
+    int numBuffers() {
+        return buffers.size();
+    }
+
+    void delete() {
+        decRef();
+    }
+
+    void incRef() {
+        refCount.incrementAndGet();
+    }
+
+    void decRef() {
+        if (refCount.decrementAndGet() == 0) {
+            length = 0;
+            for (ByteBuffer buffer : buffers) {
+                dir.releaseBuffer(buffer);
+            }
+            buffers.clear();
+            sizeInBytes = 0;
+        }
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferFileOutput.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferFileOutput.java
new file mode 100644
index 0000000..7de55bf
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferFileOutput.java
@@ -0,0 +1,65 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.nio.ByteBuffer;
+
+/**
+ */
+public class ByteBufferFileOutput extends ByteBufferFile {
+
+    public ByteBufferFileOutput(ByteBufferDirectory dir, int bufferSize) {
+        super(dir, bufferSize);
+    }
+
+    @Override
+    public synchronized long getLength() {
+        return super.getLength();
+    }
+
+    @Override
+    public synchronized long getLastModified() {
+        return super.getLastModified();
+    }
+
+    synchronized void setLength(long length) {
+        this.length = length;
+    }
+
+    synchronized final void addBuffer(ByteBuffer buffer) {
+        buffers.add(buffer);
+        sizeInBytes += buffer.remaining();
+        dir.sizeInBytes.addAndGet(buffer.remaining());
+    }
+
+    @Override
+    synchronized ByteBuffer getBuffer(int index) {
+        return super.getBuffer(index);
+    }
+
+    @Override
+    synchronized int numBuffers() {
+        return super.numBuffers();
+    }
+
+    @Override
+    synchronized long sizeInBytes() {
+        return super.sizeInBytes();
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexInput.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexInput.java
new file mode 100644
index 0000000..aeba553
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexInput.java
@@ -0,0 +1,198 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexInput;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+
+/**
+ */
+public class ByteBufferIndexInput extends IndexInput {
+
+    private final static ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0).asReadOnlyBuffer();
+
+    private final ByteBufferFile file;
+    private final long length;
+
+    private ByteBuffer currentBuffer;
+    private int currentBufferIndex;
+
+    private long bufferStart;
+    private final int BUFFER_SIZE;
+
+    private volatile boolean closed = false;
+
+    public ByteBufferIndexInput(String name, ByteBufferFile file) throws IOException {
+        super("BBIndexInput(name=" + name + ")");
+        this.file = file;
+        this.file.incRef();
+        this.length = file.getLength();
+        this.BUFFER_SIZE = file.bufferSize;
+
+        // make sure that we switch to the
+        // first needed buffer lazily
+        currentBufferIndex = -1;
+        currentBuffer = EMPTY_BUFFER;
+    }
+
+    @Override
+    public void close() {
+        // we protected from double closing the index input since
+        // some tests do that...
+        if (closed) {
+            return;
+        }
+        closed = true;
+        file.decRef();
+    }
+
+    @Override
+    public long length() {
+        return length;
+    }
+
+    @Override
+    public short readShort() throws IOException {
+        try {
+            currentBuffer.mark();
+            return currentBuffer.getShort();
+        } catch (BufferUnderflowException e) {
+            currentBuffer.reset();
+            return super.readShort();
+        }
+    }
+
+    @Override
+    public int readInt() throws IOException {
+        try {
+            currentBuffer.mark();
+            return currentBuffer.getInt();
+        } catch (BufferUnderflowException e) {
+            currentBuffer.reset();
+            return super.readInt();
+        }
+    }
+
+    @Override
+    public long readLong() throws IOException {
+        try {
+            currentBuffer.mark();
+            return currentBuffer.getLong();
+        } catch (BufferUnderflowException e) {
+            currentBuffer.reset();
+            return super.readLong();
+        }
+    }
+
+    @Override
+    public byte readByte() throws IOException {
+        if (!currentBuffer.hasRemaining()) {
+            currentBufferIndex++;
+            switchCurrentBuffer(true);
+        }
+        return currentBuffer.get();
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) throws IOException {
+        while (len > 0) {
+            if (!currentBuffer.hasRemaining()) {
+                currentBufferIndex++;
+                switchCurrentBuffer(true);
+            }
+
+            int remainInBuffer = currentBuffer.remaining();
+            int bytesToCopy = len < remainInBuffer ? len : remainInBuffer;
+            currentBuffer.get(b, offset, bytesToCopy);
+            offset += bytesToCopy;
+            len -= bytesToCopy;
+        }
+    }
+
+    @Override
+    public long getFilePointer() {
+        return currentBufferIndex < 0 ? 0 : bufferStart + currentBuffer.position();
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+        if (currentBuffer == EMPTY_BUFFER || pos < bufferStart || pos >= bufferStart + BUFFER_SIZE) {
+            currentBufferIndex = (int) (pos / BUFFER_SIZE);
+            switchCurrentBuffer(false);
+        }
+        try {
+            currentBuffer.position((int) (pos % BUFFER_SIZE));
+            // Grrr, need to wrap in IllegalArgumentException since tests (if not other places)
+            // expect an IOException...
+        } catch (IllegalArgumentException e) {
+            IOException ioException = new IOException("seeking past position");
+            ioException.initCause(e);
+            throw ioException;
+        }
+    }
+
+    private void switchCurrentBuffer(boolean enforceEOF) throws IOException {
+        if (currentBufferIndex >= file.numBuffers()) {
+            // end of file reached, no more buffers left
+            if (enforceEOF) {
+                throw new EOFException("Read past EOF (resource: " + this + ")");
+            } else {
+                // Force EOF if a read takes place at this position
+                currentBufferIndex--;
+                currentBuffer.position(currentBuffer.limit());
+            }
+        } else {
+            ByteBuffer buffer = file.getBuffer(currentBufferIndex);
+            // we must duplicate (and make it read only while we are at it) since we need position and such to be independent
+            currentBuffer = buffer.asReadOnlyBuffer();
+            currentBuffer.position(0);
+            bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
+            // if we are at the tip, limit the current buffer to only whats available to read
+            long buflen = length - bufferStart;
+            if (buflen < BUFFER_SIZE) {
+                currentBuffer.limit((int) buflen);
+            }
+
+            // we need to enforce EOF here as well...
+            if (!currentBuffer.hasRemaining()) {
+                if (enforceEOF) {
+                    throw new EOFException("Read past EOF (resource: " + this + ")");
+                } else {
+                    // Force EOF if a read takes place at this position
+                    currentBufferIndex--;
+                    currentBuffer.position(currentBuffer.limit());
+                }
+            }
+        }
+    }
+
+    @Override
+    public IndexInput clone() {
+        ByteBufferIndexInput cloned = (ByteBufferIndexInput) super.clone();
+        cloned.file.incRef(); // inc ref on cloned one
+        if (currentBuffer != EMPTY_BUFFER) {
+            cloned.currentBuffer = currentBuffer.asReadOnlyBuffer();
+            cloned.currentBuffer.position(currentBuffer.position());
+        }
+        return cloned;
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexOutput.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexOutput.java
new file mode 100644
index 0000000..a667bab
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexOutput.java
@@ -0,0 +1,132 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.store.IndexOutput;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ */
+public class ByteBufferIndexOutput extends IndexOutput {
+
+    private final static ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0).asReadOnlyBuffer();
+
+    private final ByteBufferDirectory dir;
+    private final String name;
+    private final ByteBufferAllocator allocator;
+    private final ByteBufferAllocator.Type allocatorType;
+    private final int BUFFER_SIZE;
+    private final ByteBufferFileOutput file;
+
+    private ByteBuffer currentBuffer;
+    private int currentBufferIndex;
+
+    private long bufferStart;
+
+    public ByteBufferIndexOutput(ByteBufferDirectory dir, String name, ByteBufferAllocator allocator, ByteBufferAllocator.Type allocatorType, ByteBufferFileOutput file) throws IOException {
+        this.dir = dir;
+        this.name = name;
+        this.allocator = allocator;
+        this.allocatorType = allocatorType;
+        this.BUFFER_SIZE = file.bufferSize;
+        this.file = file;
+
+        currentBufferIndex = -1;
+        currentBuffer = EMPTY_BUFFER;
+    }
+
+    @Override
+    public void close() throws IOException {
+        flush();
+        dir.closeOutput(name, file);
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+        // set the file length in case we seek back
+        // and flush() has not been called yet
+        setFileLength();
+        if (pos < bufferStart || pos >= bufferStart + BUFFER_SIZE) {
+            currentBufferIndex = (int) (pos / BUFFER_SIZE);
+            switchCurrentBuffer();
+        }
+        currentBuffer.position((int) (pos % BUFFER_SIZE));
+    }
+
+    @Override
+    public long length() {
+        return file.getLength();
+    }
+
+    @Override
+    public void writeByte(byte b) throws IOException {
+        if (!currentBuffer.hasRemaining()) {
+            currentBufferIndex++;
+            switchCurrentBuffer();
+        }
+        currentBuffer.put(b);
+    }
+
+    @Override
+    public void writeBytes(byte[] b, int offset, int len) throws IOException {
+        while (len > 0) {
+            if (!currentBuffer.hasRemaining()) {
+                currentBufferIndex++;
+                switchCurrentBuffer();
+            }
+
+            int remainInBuffer = currentBuffer.remaining();
+            int bytesToCopy = len < remainInBuffer ? len : remainInBuffer;
+            currentBuffer.put(b, offset, bytesToCopy);
+            offset += bytesToCopy;
+            len -= bytesToCopy;
+        }
+    }
+
+    private void switchCurrentBuffer() throws IOException {
+        if (currentBufferIndex == file.numBuffers()) {
+            currentBuffer = allocator.allocate(allocatorType);
+            file.addBuffer(currentBuffer);
+        } else {
+            currentBuffer = file.getBuffer(currentBufferIndex);
+        }
+        currentBuffer.position(0);
+        bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
+    }
+
+    private void setFileLength() {
+        long pointer = bufferStart + currentBuffer.position();
+        if (pointer > file.getLength()) {
+            file.setLength(pointer);
+        }
+    }
+
+    @Override
+    public void flush() throws IOException {
+        file.setLastModified(System.currentTimeMillis());
+        setFileLength();
+    }
+
+    @Override
+    public long getFilePointer() {
+        return currentBufferIndex < 0 ? 0 : bufferStart + currentBuffer.position();
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/CachingByteBufferAllocator.java b/src/main/java/org/apache/lucene/store/bytebuffer/CachingByteBufferAllocator.java
new file mode 100644
index 0000000..925a76c
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/CachingByteBufferAllocator.java
@@ -0,0 +1,82 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+/**
+ * The caching byte buffer allocator allows to define a global size for both the small and large buffers
+ * allocated. Those will be reused when possible.
+ */
+public class CachingByteBufferAllocator extends PlainByteBufferAllocator {
+
+    private final BlockingQueue<ByteBuffer> smallCache;
+    private final BlockingQueue<ByteBuffer> largeCache;
+
+    /**
+     * @param direct                 If set to true, will allocate direct buffers (off heap).
+     * @param smallBufferSizeInBytes The size (in bytes) of the small buffer allocation.
+     * @param largeBufferSizeInBytes The size (in bytes) of the large buffer allocation.
+     * @param smallCacheSizeInBytes  The size of the small cache buffer in bytes.
+     * @param largeCacheSizeInBytes  The size of the large cache buffer in bytes.
+     */
+    public CachingByteBufferAllocator(boolean direct, int smallBufferSizeInBytes, int largeBufferSizeInBytes,
+                                      int smallCacheSizeInBytes, int largeCacheSizeInBytes) {
+        super(direct, smallBufferSizeInBytes, largeBufferSizeInBytes);
+        this.smallCache = new LinkedBlockingQueue<ByteBuffer>(smallCacheSizeInBytes / smallBufferSizeInBytes);
+        this.largeCache = new LinkedBlockingQueue<ByteBuffer>(largeCacheSizeInBytes / largeBufferSizeInBytes);
+    }
+
+
+    public ByteBuffer allocate(Type type) throws IOException {
+        ByteBuffer buffer = type == Type.SMALL ? smallCache.poll() : largeCache.poll();
+        if (buffer == null) {
+            buffer = super.allocate(type);
+        }
+        return buffer;
+    }
+
+    public void release(ByteBuffer buffer) {
+        if (buffer.capacity() == smallBufferSizeInBytes) {
+            boolean success = smallCache.offer(buffer);
+            if (!success) {
+                super.release(buffer);
+            }
+        } else if (buffer.capacity() == largeBufferSizeInBytes) {
+            boolean success = largeCache.offer(buffer);
+            if (!success) {
+                super.release(buffer);
+            }
+        }
+        // otherwise, just ignore it? not our allocation...
+    }
+
+    public void close() {
+        for (ByteBuffer buffer : smallCache) {
+            super.release(buffer);
+        }
+        smallCache.clear();
+        for (ByteBuffer buffer : largeCache) {
+            super.release(buffer);
+        }
+        largeCache.clear();
+    }
+}
diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/PlainByteBufferAllocator.java b/src/main/java/org/apache/lucene/store/bytebuffer/PlainByteBufferAllocator.java
new file mode 100644
index 0000000..e800b61
--- /dev/null
+++ b/src/main/java/org/apache/lucene/store/bytebuffer/PlainByteBufferAllocator.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.store.bytebuffer;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * A simple byte buffer allocator that does not caching. The direct flag
+ * allows to control if the byte buffer will be allocated off heap or not.
+ */
+public class PlainByteBufferAllocator implements ByteBufferAllocator {
+
+    protected final boolean direct;
+
+    protected final int smallBufferSizeInBytes;
+
+    protected final int largeBufferSizeInBytes;
+
+    /**
+     * Constructs a new plain byte buffer allocator that does no caching.
+     *
+     * @param direct                 If set to true, will allocate direct buffers (off heap).
+     * @param smallBufferSizeInBytes The size (in bytes) of the small buffer allocation.
+     * @param largeBufferSizeInBytes The size (in bytes) of the large buffer allocation.
+     */
+    public PlainByteBufferAllocator(boolean direct, int smallBufferSizeInBytes, int largeBufferSizeInBytes) {
+        this.direct = direct;
+        this.smallBufferSizeInBytes = smallBufferSizeInBytes;
+        this.largeBufferSizeInBytes = largeBufferSizeInBytes;
+    }
+
+    public int sizeInBytes(Type type) {
+        return type == Type.SMALL ? smallBufferSizeInBytes : largeBufferSizeInBytes;
+    }
+
+    public ByteBuffer allocate(Type type) throws IOException {
+        int sizeToAllocate = type == Type.SMALL ? smallBufferSizeInBytes : largeBufferSizeInBytes;
+        if (direct) {
+            return ByteBuffer.allocateDirect(sizeToAllocate);
+        }
+        return ByteBuffer.allocate(sizeToAllocate);
+    }
+
+    public void release(ByteBuffer buffer) {
+        Cleaner.clean(buffer);
+    }
+
+    public void close() {
+        // nothing to do here...
+    }
+}