diff options
Diffstat (limited to 'docs/reference/query-dsl')
69 files changed, 5338 insertions, 0 deletions
diff --git a/docs/reference/query-dsl/filters.asciidoc b/docs/reference/query-dsl/filters.asciidoc new file mode 100644 index 0000000..7e4149f --- /dev/null +++ b/docs/reference/query-dsl/filters.asciidoc @@ -0,0 +1,104 @@ +[[query-dsl-filters]] +== Filters + +As a general rule, filters should be used instead of queries: + +* for binary yes/no searches +* for queries on exact values + +[float] +[[caching]] +=== Filters and Caching + +Filters can be a great candidate for caching. Caching the result of a +filter does not require a lot of memory, and will cause other queries +executing against the same filter (same parameters) to be blazingly +fast. + +Some filters already produce a result that is easily cacheable, and the +difference between caching and not caching them is the act of placing +the result in the cache or not. These filters, which include the +<<query-dsl-term-filter,term>>, +<<query-dsl-terms-filter,terms>>, +<<query-dsl-prefix-filter,prefix>>, and +<<query-dsl-range-filter,range>> filters, are by +default cached and are recommended to use (compared to the equivalent +query version) when the same filter (same parameters) will be used +across multiple different queries (for example, a range filter with age +higher than 10). + +Other filters, usually already working with the field data loaded into +memory, are not cached by default. Those filters are already very fast, +and the process of caching them requires extra processing in order to +allow the filter result to be used with different queries than the one +executed. These filters, including the geo, +and <<query-dsl-script-filter,script>> filters +are not cached by default. + +The last type of filters are those working with other filters. The +<<query-dsl-and-filter,and>>, +<<query-dsl-not-filter,not>> and +<<query-dsl-or-filter,or>> filters are not +cached as they basically just manipulate the internal filters. + +All filters allow to set `_cache` element on them to explicitly control +caching. They also allow to set `_cache_key` which will be used as the +caching key for that filter. This can be handy when using very large +filters (like a terms filter with many elements in it). + +include::filters/and-filter.asciidoc[] + +include::filters/bool-filter.asciidoc[] + +include::filters/exists-filter.asciidoc[] + +include::filters/geo-bounding-box-filter.asciidoc[] + +include::filters/geo-distance-filter.asciidoc[] + +include::filters/geo-distance-range-filter.asciidoc[] + +include::filters/geo-polygon-filter.asciidoc[] + +include::filters/geo-shape-filter.asciidoc[] + +include::filters/geohash-cell-filter.asciidoc[] + +include::filters/has-child-filter.asciidoc[] + +include::filters/has-parent-filter.asciidoc[] + +include::filters/ids-filter.asciidoc[] + +include::filters/indices-filter.asciidoc[] + +include::filters/limit-filter.asciidoc[] + +include::filters/match-all-filter.asciidoc[] + +include::filters/missing-filter.asciidoc[] + +include::filters/nested-filter.asciidoc[] + +include::filters/not-filter.asciidoc[] + +include::filters/or-filter.asciidoc[] + +include::filters/prefix-filter.asciidoc[] + +include::filters/query-filter.asciidoc[] + +include::filters/range-filter.asciidoc[] + +include::filters/regexp-filter.asciidoc[] + +include::filters/script-filter.asciidoc[] + +include::filters/term-filter.asciidoc[] + +include::filters/terms-filter.asciidoc[] + +include::filters/type-filter.asciidoc[] + + + diff --git a/docs/reference/query-dsl/filters/and-filter.asciidoc b/docs/reference/query-dsl/filters/and-filter.asciidoc new file mode 100644 index 0000000..6f171cf --- /dev/null +++ b/docs/reference/query-dsl/filters/and-filter.asciidoc @@ -0,0 +1,69 @@ +[[query-dsl-and-filter]] +=== And Filter + +A filter that matches documents using `AND` boolean operator on other +filters. Can be placed within queries that accept a filter. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "name.first" : "shay" } + }, + "filter" : { + "and" : [ + { + "range" : { + "postDate" : { + "from" : "2010-03-01", + "to" : "2010-04-01" + } + } + }, + { + "prefix" : { "name.second" : "ba" } + } + ] + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` in order to cache it (though usually not needed). Since +the `_cache` element requires to be set on the `and` filter itself, the +structure then changes a bit to have the filters provided within a +`filters` element: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "name.first" : "shay" } + }, + "filter" : { + "and" : { + "filters": [ + { + "range" : { + "postDate" : { + "from" : "2010-03-01", + "to" : "2010-04-01" + } + } + }, + { + "prefix" : { "name.second" : "ba" } + } + ], + "_cache" : true + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/bool-filter.asciidoc b/docs/reference/query-dsl/filters/bool-filter.asciidoc new file mode 100644 index 0000000..bf36d26 --- /dev/null +++ b/docs/reference/query-dsl/filters/bool-filter.asciidoc @@ -0,0 +1,49 @@ +[[query-dsl-bool-filter]] +=== Bool Filter + +A filter that matches documents matching boolean combinations of other +queries. Similar in concept to +<<query-dsl-bool-query,Boolean query>>, except +that the clauses are other filters. Can be placed within queries that +accept a filter. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "queryString" : { + "default_field" : "message", + "query" : "elasticsearch" + } + }, + "filter" : { + "bool" : { + "must" : { + "term" : { "tag" : "wow" } + }, + "must_not" : { + "range" : { + "age" : { "from" : 10, "to" : 20 } + } + }, + "should" : [ + { + "term" : { "tag" : "sometag" } + }, + { + "term" : { "tag" : "sometagtag" } + } + ] + } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the `bool` filter is not cached by default (though +internal filters might be). The `_cache` can be set to `true` in order +to enable caching. diff --git a/docs/reference/query-dsl/filters/exists-filter.asciidoc b/docs/reference/query-dsl/filters/exists-filter.asciidoc new file mode 100644 index 0000000..80e9495 --- /dev/null +++ b/docs/reference/query-dsl/filters/exists-filter.asciidoc @@ -0,0 +1,20 @@ +[[query-dsl-exists-filter]] +=== Exists Filter + +Filters documents where a specific field has a value in them. + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "exists" : { "field" : "user" } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is always cached. diff --git a/docs/reference/query-dsl/filters/geo-bounding-box-filter.asciidoc b/docs/reference/query-dsl/filters/geo-bounding-box-filter.asciidoc new file mode 100644 index 0000000..7f16ec5 --- /dev/null +++ b/docs/reference/query-dsl/filters/geo-bounding-box-filter.asciidoc @@ -0,0 +1,240 @@ +[[query-dsl-geo-bounding-box-filter]] +=== Geo Bounding Box Filter + +A filter allowing to filter hits based on a point location using a +bounding box. Assuming the following indexed document: + +[source,js] +-------------------------------------------------- +{ + "pin" : { + "location" : { + "lat" : 40.12, + "lon" : -71.34 + } + } +} +-------------------------------------------------- + +Then the following simple query can be executed with a +`geo_bounding_box` filter: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_bounding_box" : { + "pin.location" : { + "top_left" : { + "lat" : 40.73, + "lon" : -74.1 + }, + "bottom_right" : { + "lat" : 40.01, + "lon" : -71.12 + } + } + } + } + } +} +-------------------------------------------------- + +[float] +==== Accepted Formats + +In much the same way the geo_point type can accept different +representation of the geo point, the filter can accept it as well: + +[float] +===== Lat Lon As Properties + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_bounding_box" : { + "pin.location" : { + "top_left" : { + "lat" : 40.73, + "lon" : -74.1 + }, + "bottom_right" : { + "lat" : 40.01, + "lon" : -71.12 + } + } + } + } + } +} +-------------------------------------------------- + +[float] +===== Lat Lon As Array + +Format in `[lon, lat]`, note, the order of lon/lat here in order to +conform with http://geojson.org/[GeoJSON]. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_bounding_box" : { + "pin.location" : { + "top_left" : [-74.1, 40.73], + "bottom_right" : [-71.12, 40.01] + } + } + } + } +} +-------------------------------------------------- + +[float] +===== Lat Lon As String + +Format in `lat,lon`. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_bounding_box" : { + "pin.location" : { + "top_left" : "40.73, -74.1", + "bottom_right" : "40.01, -71.12" + } + } + } + } +} +-------------------------------------------------- + +[float] +===== Geohash + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_bounding_box" : { + "pin.location" : { + "top_left" : "dr5r9ydj2y73", + "bottom_right" : "drj7teegpus6" + } + } + } + } +} +-------------------------------------------------- + +[float] +==== Vertices + +The vertices of the bounding box can either be set by `top_left` and +`bottom_right` or by `top_right` and `bottom_left` parameters. More +over the names `topLeft`, `bottomRight`, `topRight` and `bottomLeft` +are supported. Instead of setting the values pairwise, one can use +the simple names `top`, `left`, `bottom` and `right` to set the +values separately. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_bounding_box" : { + "pin.location" : { + "top" : -74.1, + "left" : 40.73, + "bottom" : -71.12, + "right" : 40.01 + } + } + } + } +} +-------------------------------------------------- + + +[float] +==== geo_point Type + +The filter *requires* the `geo_point` type to be set on the relevant +field. + +[float] +==== Multi Location Per Document + +The filter can work with multiple locations / points per document. Once +a single location / point matches the filter, the document will be +included in the filter + +[float] +==== Type + +The type of the bounding box execution by default is set to `memory`, +which means in memory checks if the doc falls within the bounding box +range. In some cases, an `indexed` option will perform faster (but note +that the `geo_point` type must have lat and lon indexed in this case). +Note, when using the indexed option, multi locations per document field +are not supported. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_bounding_box" : { + "pin.location" : { + "top_left" : { + "lat" : 40.73, + "lon" : -74.1 + }, + "bottom_right" : { + "lat" : 40.10, + "lon" : -71.12 + } + }, + "type" : "indexed" + } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` to cache the *result* of the filter. This is handy when +the same bounding box parameters are used on several (many) other +queries. Note, the process of caching the first execution is higher when +caching (since it needs to satisfy different queries). diff --git a/docs/reference/query-dsl/filters/geo-distance-filter.asciidoc b/docs/reference/query-dsl/filters/geo-distance-filter.asciidoc new file mode 100644 index 0000000..f501398 --- /dev/null +++ b/docs/reference/query-dsl/filters/geo-distance-filter.asciidoc @@ -0,0 +1,184 @@ +[[query-dsl-geo-distance-filter]] +=== Geo Distance Filter + +Filters documents that include only hits that exists within a specific +distance from a geo point. Assuming the following indexed json: + +[source,js] +-------------------------------------------------- +{ + "pin" : { + "location" : { + "lat" : 40.12, + "lon" : -71.34 + } + } +} +-------------------------------------------------- + +Then the following simple query can be executed with a `geo_distance` +filter: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_distance" : { + "distance" : "200km", + "pin.location" : { + "lat" : 40, + "lon" : -70 + } + } + } + } +} +-------------------------------------------------- + +[float] +==== Accepted Formats + +In much the same way the `geo_point` type can accept different +representation of the geo point, the filter can accept it as well: + +[float] +===== Lat Lon As Properties + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_distance" : { + "distance" : "12km", + "pin.location" : { + "lat" : 40, + "lon" : -70 + } + } + } + } +} +-------------------------------------------------- + +[float] +===== Lat Lon As Array + +Format in `[lon, lat]`, note, the order of lon/lat here in order to +conform with http://geojson.org/[GeoJSON]. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_distance" : { + "distance" : "12km", + "pin.location" : [40, -70] + } + } + } +} +-------------------------------------------------- + +[float] +===== Lat Lon As String + +Format in `lat,lon`. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_distance" : { + "distance" : "12km", + "pin.location" : "40,-70" + } + } + } +} +-------------------------------------------------- + +[float] +===== Geohash + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_distance" : { + "distance" : "12km", + "pin.location" : "drm3btev3e86" + } + } + } +} +-------------------------------------------------- + +[float] +==== Options + +The following are options allowed on the filter: + +[horizontal] + +`distance`:: + + The radius of the circle centred on the specified location. Points which + fall into this circle are considered to be matches. The `distance` can be + specified in various units. See <<distance-units>>. + +`distance_type`:: + + How to compute the distance. Can either be `arc` (better precision), + `sloppy_arc` (faster but less precise) or `plane` (fastest). Defaults + to `sloppy_arc`. + +`optimize_bbox`:: + + Whether to use the optimization of first running a bounding box check + before the distance check. Defaults to `memory` which will do in memory + checks. Can also have values of `indexed` to use indexed value check (make + sure the `geo_point` type index lat lon in this case), or `none` which + disables bounding box optimization. + + +[float] +==== geo_point Type + +The filter *requires* the `geo_point` type to be set on the relevant +field. + +[float] +==== Multi Location Per Document + +The `geo_distance` filter can work with multiple locations / points per +document. Once a single location / point matches the filter, the +document will be included in the filter. + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` to cache the *result* of the filter. This is handy when +the same point and distance parameters are used on several (many) other +queries. Note, the process of caching the first execution is higher when +caching (since it needs to satisfy different queries). diff --git a/docs/reference/query-dsl/filters/geo-distance-range-filter.asciidoc b/docs/reference/query-dsl/filters/geo-distance-range-filter.asciidoc new file mode 100644 index 0000000..1bc4197 --- /dev/null +++ b/docs/reference/query-dsl/filters/geo-distance-range-filter.asciidoc @@ -0,0 +1,30 @@ +[[query-dsl-geo-distance-range-filter]] +=== Geo Distance Range Filter + +Filters documents that exists within a range from a specific point: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_distance_range" : { + "from" : "200km", + "to" : "400km" + "pin.location" : { + "lat" : 40, + "lon" : -70 + } + } + } + } +} +-------------------------------------------------- + +Supports the same point location parameter as the +<<query-dsl-geo-distance-filter,geo_distance>> +filter. And also support the common parameters for range (lt, lte, gt, +gte, from, to, include_upper and include_lower). diff --git a/docs/reference/query-dsl/filters/geo-polygon-filter.asciidoc b/docs/reference/query-dsl/filters/geo-polygon-filter.asciidoc new file mode 100644 index 0000000..a421234 --- /dev/null +++ b/docs/reference/query-dsl/filters/geo-polygon-filter.asciidoc @@ -0,0 +1,126 @@ +[[query-dsl-geo-polygon-filter]] +=== Geo Polygon Filter + +A filter allowing to include hits that only fall within a polygon of +points. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_polygon" : { + "person.location" : { + "points" : [ + {"lat" : 40, "lon" : -70}, + {"lat" : 30, "lon" : -80}, + {"lat" : 20, "lon" : -90} + ] + } + } + } + } +} +-------------------------------------------------- + +[float] +==== Allowed Formats + +[float] +===== Lat Long as Array + +Format in `[lon, lat]`, note, the order of lon/lat here in order to +conform with http://geojson.org/[GeoJSON]. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_polygon" : { + "person.location" : { + "points" : [ + [-70, 40], + [-80, 30], + [-90, 20] + ] + } + } + } + } +} +-------------------------------------------------- + +[float] +===== Lat Lon as String + +Format in `lat,lon`. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_polygon" : { + "person.location" : { + "points" : [ + "40, -70", + "30, -80", + "20, -90" + ] + } + } + } + } +} +-------------------------------------------------- + +[float] +===== Geohash + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geo_polygon" : { + "person.location" : { + "points" : [ + "drn5x1g8cu2y", + "30, -80", + "20, -90" + ] + } + } + } + } +} +-------------------------------------------------- + +[float] +==== geo_point Type + +The filter *requires* the +<<mapping-geo-point-type,geo_point>> type to be +set on the relevant field. + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` to cache the *result* of the filter. This is handy when +the same points parameters are used on several (many) other queries. +Note, the process of caching the first execution is higher when caching +(since it needs to satisfy different queries). diff --git a/docs/reference/query-dsl/filters/geo-shape-filter.asciidoc b/docs/reference/query-dsl/filters/geo-shape-filter.asciidoc new file mode 100644 index 0000000..f97e798 --- /dev/null +++ b/docs/reference/query-dsl/filters/geo-shape-filter.asciidoc @@ -0,0 +1,121 @@ +[[query-dsl-geo-shape-filter]] +=== GeoShape Filter + +Filter documents indexed using the `geo_shape` type. + +Requires the <<mapping-geo-shape-type,geo_shape +Mapping>>. + +You may also use the +<<query-dsl-geo-shape-query,geo_shape Query>>. + +The `geo_shape` Filter uses the same grid square representation as the +geo_shape mapping to find documents that have a shape that intersects +with the query shape. It will also use the same PrefixTree configuration +as defined for the field mapping. + +[float] +==== Filter Format + +The Filter supports two ways of defining the Filter shape, either by +providing a whole shape defintion, or by referencing the name of a shape +pre-indexed in another index. Both formats are defined below with +examples. + +[float] +===== Provided Shape Definition + +Similar to the `geo_shape` type, the `geo_shape` Filter uses +http://www.geojson.org[GeoJSON] to represent shapes. + +Given a document that looks like this: + +[source,js] +-------------------------------------------------- +{ + "name": "Wind & Wetter, Berlin, Germany", + "location": { + "type": "Point", + "coordinates": [13.400544, 52.530286] + } +} +-------------------------------------------------- + +The following query will find the point using the Elasticsearch's +`envelope` GeoJSON extension: + +[source,js] +-------------------------------------------------- +{ + "query":{ + "filtered": { + "query": { + "match_all": {} + }, + "filter": { + "geo_shape": { + "location": { + "shape": { + "type": "envelope", + "coordinates" : [[13.0, 53.0], [14.0, 52.0]] + } + } + } + } + } + } +} +-------------------------------------------------- + +[float] +===== Pre-Indexed Shape + +The Filter also supports using a shape which has already been indexed in +another index and/or index type. This is particularly useful for when +you have a pre-defined list of shapes which are useful to your +application and you want to reference this using a logical name (for +example 'New Zealand') rather than having to provide their coordinates +each time. In this situation it is only necessary to provide: + +* `id` - The ID of the document that containing the pre-indexed shape. +* `index` - Name of the index where the pre-indexed shape is. Defaults +to 'shapes'. +* `type` - Index type where the pre-indexed shape is. +* `path` - The field specified as path containing the pre-indexed shape. +Defaults to 'shape'. + +The following is an example of using the Filter with a pre-indexed +shape: + +[source,js] +-------------------------------------------------- +{ + "filtered": { + "query": { + "match_all": {} + }, + "filter": { + "geo_shape": { + "location": { + "indexed_shape": { + "id": "DEU", + "type": "countries", + "index": "shapes", + "path": "location" + } + } + } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the Filter is not cached by default. Setting `_cache` to +`true` will mean the results of the Filter will be cached. Since shapes +can contain 10s-100s of coordinates and any one differing means a new +shape, it may make sense to only using caching when you are sure that +the shapes will remain reasonably static. + diff --git a/docs/reference/query-dsl/filters/geohash-cell-filter.asciidoc b/docs/reference/query-dsl/filters/geohash-cell-filter.asciidoc new file mode 100644 index 0000000..3440902 --- /dev/null +++ b/docs/reference/query-dsl/filters/geohash-cell-filter.asciidoc @@ -0,0 +1,62 @@ +[[query-dsl-geohash-cell-filter]] +=== Geohash Cell Filter + +The `geohash_cell` filter provides access to a hierarchy of geohashes. +By defining a geohash cell, only <<mapping-geo-point-type,geopoints>> +within this cell will match this filter. + +To get this filter work all prefixes of a geohash need to be indexed. In +example a geohash `u30` needs to be decomposed into three terms: `u30`, +`u3` and `u`. This decomposition must be enabled in the mapping of the +<<mapping-geo-point-type,geopoint>> field that's going to be filtered by +setting the `geohash_prefix` option: + +[source,js] +-------------------------------------------------- +{ + "mappings" : { + "location": { + "properties": { + "pin": { + "type": "geo_point", + "geohash": true, + "geohash_prefix": true, + "geohash_precision": 10 + } + } + } + } +} +-------------------------------------------------- + +The geohash cell can defined by all formats of `geo_points`. If such a cell is +defined by a latitude and longitude pair the size of the cell needs to be +setup. This can be done by the `precision` parameter of the filter. This +parameter can be set to an integer value which sets the length of the geohash +prefix. Instead of setting a geohash length directly it is also possible to +define the precision as distance, in example `"precision": "50m"`. (See +<<distance-units>>.) + +The `neighbor` option of the filter offers the possibility to filter cells +next to the given cell. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "match_all" : {} + }, + "filter" : { + "geohash_cell": { + "pin": { + "lat": 13.4080, + "lon": 52.5186 + }, + "precision": 3, + "neighbors": true + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/has-child-filter.asciidoc b/docs/reference/query-dsl/filters/has-child-filter.asciidoc new file mode 100644 index 0000000..1a8e116 --- /dev/null +++ b/docs/reference/query-dsl/filters/has-child-filter.asciidoc @@ -0,0 +1,57 @@ +[[query-dsl-has-child-filter]] +=== Has Child Filter + +The `has_child` filter accepts a query and the child type to run +against, and results in parent documents that have child docs matching +the query. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "has_child" : { + "type" : "blog_tag", + "query" : { + "term" : { + "tag" : "something" + } + } + } +} +-------------------------------------------------- + +The `type` is the child type to query against. The parent type to return +is automatically detected based on the mappings. + +The way that the filter is implemented is by first running the child +query, doing the matching up to the parent doc for each document +matched. + +The `has_child` filter also accepts a filter instead of a query: + +[source,js] +-------------------------------------------------- +{ + "has_child" : { + "type" : "comment", + "filter" : { + "term" : { + "user" : "john" + } + } + } +} +-------------------------------------------------- + +[float] +==== Memory Considerations + +With the current implementation, all `_id` values are loaded to memory +(heap) in order to support fast lookups, so make sure there is enough +memory for it. + +[float] +==== Caching + +The `has_child` filter cannot be cached in the filter cache. The `_cache` +and `_cache_key` options are a no-op in this filter. Also any filter that +wraps the `has_child` filter either directly or indirectly will not be cached.
\ No newline at end of file diff --git a/docs/reference/query-dsl/filters/has-parent-filter.asciidoc b/docs/reference/query-dsl/filters/has-parent-filter.asciidoc new file mode 100644 index 0000000..fe4f66d --- /dev/null +++ b/docs/reference/query-dsl/filters/has-parent-filter.asciidoc @@ -0,0 +1,60 @@ +[[query-dsl-has-parent-filter]] +=== Has Parent Filter + +The `has_parent` filter accepts a query and a parent type. The query is +executed in the parent document space, which is specified by the parent +type. This filter return child documents which associated parents have +matched. For the rest `has_parent` filter has the same options and works +in the same manner as the `has_child` filter. + +[float] +==== Filter example + +[source,js] +-------------------------------------------------- +{ + "has_parent" : { + "parent_type" : "blog", + "query" : { + "term" : { + "tag" : "something" + } + } + } +} +-------------------------------------------------- + +The `parent_type` field name can also be abbreviated to `type`. + +The way that the filter is implemented is by first running the parent +query, doing the matching up to the child doc for each document matched. + +The `has_parent` filter also accepts a filter instead of a query: + +[source,js] +-------------------------------------------------- +{ + "has_parent" : { + "type" : "blog", + "filter" : { + "term" : { + "text" : "bonsai three" + } + } + } +} +-------------------------------------------------- + +[float] +==== Memory considerations + +With the current implementation, all `_id` values are loaded to memory +(heap) in order to support fast lookups, so make sure there is enough +memory for it. + +[float] +==== Caching + +The `has_parent` filter cannot be cached in the filter cache. The `_cache` +and `_cache_key` options are a no-op in this filter. Also any filter that +wraps the `has_parent` filter either directly or indirectly will not be cached.
\ No newline at end of file diff --git a/docs/reference/query-dsl/filters/ids-filter.asciidoc b/docs/reference/query-dsl/filters/ids-filter.asciidoc new file mode 100644 index 0000000..303fffb --- /dev/null +++ b/docs/reference/query-dsl/filters/ids-filter.asciidoc @@ -0,0 +1,20 @@ +[[query-dsl-ids-filter]] +=== Ids Filter + +Filters documents that only have the provided ids. Note, this filter +does not require the <<mapping-id-field,_id>> +field to be indexed since it works using the +<<mapping-uid-field,_uid>> field. + +[source,js] +-------------------------------------------------- +{ + "ids" : { + "type" : "my_type", + "values" : ["1", "4", "100"] + } +} +-------------------------------------------------- + +The `type` is optional and can be omitted, and can also accept an array +of values. diff --git a/docs/reference/query-dsl/filters/indices-filter.asciidoc b/docs/reference/query-dsl/filters/indices-filter.asciidoc new file mode 100644 index 0000000..05ad232 --- /dev/null +++ b/docs/reference/query-dsl/filters/indices-filter.asciidoc @@ -0,0 +1,37 @@ +[[query-dsl-indices-filter]] +=== Indices Filter + +The `indices` filter can be used when executed across multiple indices, +allowing to have a filter that executes only when executed on an index +that matches a specific list of indices, and another filter that executes +when it is executed on an index that does not match the listed indices. + +[source,js] +-------------------------------------------------- +{ + "indices" : { + "indices" : ["index1", "index2"], + "filter" : { + "term" : { "tag" : "wow" } + }, + "no_match_filter" : { + "term" : { "tag" : "kow" } + } + } +} +-------------------------------------------------- + +You can use the `index` field to provide a single index. + +`no_match_filter` can also have "string" value of `none` (to match no +documents), and `all` (to match all). Defaults to `all`. + +`filter` is mandatory, as well as `indices` (or `index`). + +[TIP] +=================================================================== +The fields order is important: if the `indices` are provided before `filter` +or `no_match_filter`, the related filters get parsed only against the indices +that they are going to be executed on. This is useful to avoid parsing filters +when it is not necessary and prevent potential mapping errors. +=================================================================== diff --git a/docs/reference/query-dsl/filters/limit-filter.asciidoc b/docs/reference/query-dsl/filters/limit-filter.asciidoc new file mode 100644 index 0000000..a590c25 --- /dev/null +++ b/docs/reference/query-dsl/filters/limit-filter.asciidoc @@ -0,0 +1,19 @@ +[[query-dsl-limit-filter]] +=== Limit Filter + +A limit filter limits the number of documents (per shard) to execute on. +For example: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "filter" : { + "limit" : {"value" : 100} + }, + "query" : { + "term" : { "name.first" : "shay" } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/match-all-filter.asciidoc b/docs/reference/query-dsl/filters/match-all-filter.asciidoc new file mode 100644 index 0000000..97adbd1 --- /dev/null +++ b/docs/reference/query-dsl/filters/match-all-filter.asciidoc @@ -0,0 +1,15 @@ +[[query-dsl-match-all-filter]] +=== Match All Filter + +A filter that matches on all documents: + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "match_all" : { } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/missing-filter.asciidoc b/docs/reference/query-dsl/filters/missing-filter.asciidoc new file mode 100644 index 0000000..70685bd --- /dev/null +++ b/docs/reference/query-dsl/filters/missing-filter.asciidoc @@ -0,0 +1,41 @@ +[[query-dsl-missing-filter]] +=== Missing Filter + +Filters documents where a specific field has no value in them. + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "missing" : { "field" : "user" } + } + } +} +-------------------------------------------------- + +By default, the filter will only find "missing" fields, i.e., fields +that have no values. It can be configured also to find fields with an +explicit `null_value` mapped for them. Here is an example that will both +find missing field that don't exists (`existence` set to `true`), or +have null values (`null_value` set to `true`). + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "missing" : { + "field" : "user", + "existence" : true, + "null_value" : true + } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is always cached. diff --git a/docs/reference/query-dsl/filters/nested-filter.asciidoc b/docs/reference/query-dsl/filters/nested-filter.asciidoc new file mode 100644 index 0000000..6235be2 --- /dev/null +++ b/docs/reference/query-dsl/filters/nested-filter.asciidoc @@ -0,0 +1,78 @@ +[[query-dsl-nested-filter]] +=== Nested Filter + +A `nested` filter works in a similar fashion to the +<<query-dsl-nested-query,nested>> query, except it's +used as a filter. It follows exactly the same structure, but also allows +to cache the results (set `_cache` to `true`), and have it named (set +the `_name` value). For example: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { "match_all" : {} }, + "filter" : { + "nested" : { + "path" : "obj1", + "filter" : { + "bool" : { + "must" : [ + { + "term" : {"obj1.name" : "blue"} + }, + { + "range" : {"obj1.count" : {"gt" : 5}} + } + ] + } + }, + "_cache" : true + } + } + } +} +-------------------------------------------------- + +[float] +==== Join option + +The nested filter also supports a `join` option which controls whether to perform the block join or not. +By default, it's enabled. But when it's disabled, it emits the hidden nested documents as hits instead of the joined root document. + +This is useful when a `nested` filter is used in a facet where nested is enabled, like you can see in the example below: + +[source,js] +-------------------------------------------------- +{ + "query" : { + "nested" : { + "path" : "offers", + "query" : { + "match" : { + "offers.color" : "blue" + } + } + } + }, + "facets" : { + "size" : { + "terms" : { + "field" : "offers.size" + }, + "facet_filter" : { + "nested" : { + "path" : "offers", + "query" : { + "match" : { + "offers.color" : "blue" + } + }, + "join" : false + } + }, + "nested" : "offers" + } + } +}' +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/not-filter.asciidoc b/docs/reference/query-dsl/filters/not-filter.asciidoc new file mode 100644 index 0000000..629cb17 --- /dev/null +++ b/docs/reference/query-dsl/filters/not-filter.asciidoc @@ -0,0 +1,82 @@ +[[query-dsl-not-filter]] +=== Not Filter + +A filter that filters out matched documents using a query. Can be placed +within queries that accept a filter. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "name.first" : "shay" } + }, + "filter" : { + "not" : { + "range" : { + "postDate" : { + "from" : "2010-03-01", + "to" : "2010-04-01" + } + } + } + } + } +} +-------------------------------------------------- + +Or, in a longer form with a `filter` element: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "name.first" : "shay" } + }, + "filter" : { + "not" : { + "filter" : { + "range" : { + "postDate" : { + "from" : "2010-03-01", + "to" : "2010-04-01" + } + } + } + } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` in order to cache it (though usually not needed). Here is +an example: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "name.first" : "shay" } + }, + "filter" : { + "not" : { + "filter" : { + "range" : { + "postDate" : { + "from" : "2010-03-01", + "to" : "2010-04-01" + } + } + }, + "_cache" : true + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/or-filter.asciidoc b/docs/reference/query-dsl/filters/or-filter.asciidoc new file mode 100644 index 0000000..9c68cb9 --- /dev/null +++ b/docs/reference/query-dsl/filters/or-filter.asciidoc @@ -0,0 +1,59 @@ +[[query-dsl-or-filter]] +=== Or Filter + +A filter that matches documents using `OR` boolean operator on other +queries. Can be placed within queries that accept a filter. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "name.first" : "shay" } + }, + "filter" : { + "or" : [ + { + "term" : { "name.second" : "banon" } + }, + { + "term" : { "name.nick" : "kimchy" } + } + ] + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` in order to cache it (though usually not needed). Since +the `_cache` element requires to be set on the `or` filter itself, the +structure then changes a bit to have the filters provided within a +`filters` element: + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "name.first" : "shay" } + }, + "filter" : { + "or" : { + "filters" : [ + { + "term" : { "name.second" : "banon" } + }, + { + "term" : { "name.nick" : "kimchy" } + } + ], + "_cache" : true + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/prefix-filter.asciidoc b/docs/reference/query-dsl/filters/prefix-filter.asciidoc new file mode 100644 index 0000000..d29f570 --- /dev/null +++ b/docs/reference/query-dsl/filters/prefix-filter.asciidoc @@ -0,0 +1,37 @@ +[[query-dsl-prefix-filter]] +=== Prefix Filter + +Filters documents that have fields containing terms with a specified +prefix (*not analyzed*). Similar to phrase query, except that it acts as +a filter. Can be placed within queries that accept a filter. + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "prefix" : { "user" : "ki" } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is cached by default. The `_cache` can be set +to `false` in order not to cache it. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "prefix" : { + "user" : "ki", + "_cache" : false + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/query-filter.asciidoc b/docs/reference/query-dsl/filters/query-filter.asciidoc new file mode 100644 index 0000000..8cd3858 --- /dev/null +++ b/docs/reference/query-dsl/filters/query-filter.asciidoc @@ -0,0 +1,50 @@ +[[query-dsl-query-filter]] +=== Query Filter + +Wraps any query to be used as a filter. Can be placed within queries +that accept a filter. + +[source,js] +-------------------------------------------------- +{ + "constantScore" : { + "filter" : { + "query" : { + "query_string" : { + "query" : "this AND that OR thus" + } + } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` to cache the *result* of the filter. This is handy when +the same query is used on several (many) other queries. Note, the +process of caching the first execution is higher when not caching (since +it needs to satisfy different queries). + +Setting the `_cache` element requires a different format for the +`query`: + +[source,js] +-------------------------------------------------- +{ + "constantScore" : { + "filter" : { + "fquery" : { + "query" : { + "query_string" : { + "query" : "this AND that OR thus" + } + }, + "_cache" : true + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/range-filter.asciidoc b/docs/reference/query-dsl/filters/range-filter.asciidoc new file mode 100644 index 0000000..7399d00 --- /dev/null +++ b/docs/reference/query-dsl/filters/range-filter.asciidoc @@ -0,0 +1,55 @@ +[[query-dsl-range-filter]] +=== Range Filter + +Filters documents with fields that have terms within a certain range. +Similar to <<query-dsl-range-query,range +query>>, except that it acts as a filter. Can be placed within queries +that accept a filter. + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "range" : { + "age" : { + "gte": 10, + "lte": 20 + } + } + } + } +} +-------------------------------------------------- + +The `range` filter accepts the following parameters: + +[horizontal] +`gte`:: Greater-than or equal to +`gt`:: Greater-than +`lte`:: Less-than or equal to +`lt`:: Less-than + +[float] +==== Execution + +The `execution` option controls how the range filter internally executes. The `execution` option accepts the following values: + +[horizontal] +`index`:: Uses field's inverted in order to determine of documents fall with in the range filter's from and to range +`fielddata`:: Uses field data in order to determine of documents fall with in the range filter's from and to range. + +In general for small ranges the `index` execution is faster and for longer ranges the `fielddata` execution is faster. + +The `fielddata` execution as the same suggests uses field data and therefor requires more memory, so make you have +sufficient memory on your nodes in order to use this execution mode. It usually makes sense to use it on fields you're +already faceting or sorting by. + +[float] +==== Caching + +The result of the filter is only automatically cached by default if the `execution` is set to `index`. The +`_cache` can be set to `false` to turn it off. + +If the `now` date math expression is used without rounding then a range filter will never be cached even if `_cache` is +set to `true`. Also any filter that wraps this filter will never be cached. diff --git a/docs/reference/query-dsl/filters/regexp-filter.asciidoc b/docs/reference/query-dsl/filters/regexp-filter.asciidoc new file mode 100644 index 0000000..72cc169 --- /dev/null +++ b/docs/reference/query-dsl/filters/regexp-filter.asciidoc @@ -0,0 +1,53 @@ +[[query-dsl-regexp-filter]] +=== Regexp Filter + +The `regexp` filter is similar to the +<<query-dsl-regexp-query,regexp>> query, except +that it is cacheable and can speedup performance in case you are reusing +this filter in your queries. + +See <<regexp-syntax>> for details of the supported regular expression language. + +[source,js] +-------------------------------------------------- +{ + "filtered": { + "query": { + "match_all": {} + }, + "filter": { + "regexp":{ + "name.first" : "s.*y" + } + } + } +} +-------------------------------------------------- + +You can also select the cache name and use the same regexp flags in the +filter as in the query. + +*Note*: You have to enable caching explicitly in order to have the +`regexp` filter cached. + +[source,js] +-------------------------------------------------- +{ + "filtered": { + "query": { + "match_all": {} + }, + "filter": { + "regexp":{ + "name.first" : { + "value" : "s.*y", + "flags" : "INTERSECTION|COMPLEMENT|EMPTY" + }, + "_name":"test", + "_cache" : true, + "_cache_key" : "key" + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/script-filter.asciidoc b/docs/reference/query-dsl/filters/script-filter.asciidoc new file mode 100644 index 0000000..f9e0cd1 --- /dev/null +++ b/docs/reference/query-dsl/filters/script-filter.asciidoc @@ -0,0 +1,53 @@ +[[query-dsl-script-filter]] +=== Script Filter + +A filter allowing to define +<<modules-scripting,scripts>> as filters. For +example: + +[source,js] +---------------------------------------------- +"filtered" : { + "query" : { + ... + }, + "filter" : { + "script" : { + "script" : "doc['num1'].value > 1" + } + } +} +---------------------------------------------- + +[float] +==== Custom Parameters + +Scripts are compiled and cached for faster execution. If the same script +can be used, just with different parameters provider, it is preferable +to use the ability to pass parameters to the script itself, for example: + +[source,js] +---------------------------------------------- +"filtered" : { + "query" : { + ... + }, + "filter" : { + "script" : { + "script" : "doc['num1'].value > param1" + "params" : { + "param1" : 5 + } + } + } +} +---------------------------------------------- + +[float] +==== Caching + +The result of the filter is not cached by default. The `_cache` can be +set to `true` to cache the *result* of the filter. This is handy when +the same script and parameters are used on several (many) other queries. +Note, the process of caching the first execution is higher when caching +(since it needs to satisfy different queries). diff --git a/docs/reference/query-dsl/filters/term-filter.asciidoc b/docs/reference/query-dsl/filters/term-filter.asciidoc new file mode 100644 index 0000000..09cd32d --- /dev/null +++ b/docs/reference/query-dsl/filters/term-filter.asciidoc @@ -0,0 +1,38 @@ +[[query-dsl-term-filter]] +=== Term Filter + +Filters documents that have fields that contain a term (*not analyzed*). +Similar to <<query-dsl-term-query,term query>>, +except that it acts as a filter. Can be placed within queries that +accept a filter, for example: + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "term" : { "user" : "kimchy"} + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is automatically cached by default. The +`_cache` can be set to `false` to turn it off. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "term" : { + "user" : "kimchy", + "_cache" : false + } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/filters/terms-filter.asciidoc b/docs/reference/query-dsl/filters/terms-filter.asciidoc new file mode 100644 index 0000000..4de6f12 --- /dev/null +++ b/docs/reference/query-dsl/filters/terms-filter.asciidoc @@ -0,0 +1,227 @@ +[[query-dsl-terms-filter]] +=== Terms Filter + +Filters documents that have fields that match any of the provided terms +(*not analyzed*). For example: + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "terms" : { "user" : ["kimchy", "elasticsearch"]} + } + } +} +-------------------------------------------------- + +The `terms` filter is also aliased with `in` as the filter name for +simpler usage. + +[float] +==== Execution Mode + +The way terms filter executes is by iterating over the terms provided +and finding matches docs (loading into a bitset) and caching it. +Sometimes, we want a different execution model that can still be +achieved by building more complex queries in the DSL, but we can support +them in the more compact model that terms filter provides. + +The `execution` option now has the following options : + +[horizontal] +`plain`:: + The default. Works as today. Iterates over all the terms, + building a bit set matching it, and filtering. The total filter is + cached. + +`fielddata`:: + Generates a terms filters that uses the fielddata cache to + compare terms. This execution mode is great to use when filtering + on a field that is already loaded into the fielddata cache from + faceting, sorting, or index warmers. When filtering on + a large number of terms, this execution can be considerably faster + than the other modes. The total filter is not cached unless + explicitly configured to do so. + +`bool`:: + Generates a term filter (which is cached) for each term, and + wraps those in a bool filter. The bool filter itself is not cached as it + can operate very quickly on the cached term filters. + +`and`:: + Generates a term filter (which is cached) for each term, and + wraps those in an and filter. The and filter itself is not cached. + +`or`:: + Generates a term filter (which is cached) for each term, and + wraps those in an or filter. The or filter itself is not cached. + Generally, the `bool` execution mode should be preferred. + +If you don't want the generated individual term queries to be cached, +you can use: `bool_nocache`, `and_nocache` or `or_nocache` instead, but +be aware that this will affect performance. + +The "total" terms filter caching can still be explicitly controlled +using the `_cache` option. Note the default value for it depends on the +execution value. + +For example: + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "terms" : { + "user" : ["kimchy", "elasticsearch"], + "execution" : "bool", + "_cache": true + } + } + } +} +-------------------------------------------------- + +[float] +==== Caching + +The result of the filter is automatically cached by default. The +`_cache` can be set to `false` to turn it off. + +[float] +==== Terms lookup mechanism + +When it's needed to specify a `terms` filter with a lot of terms it can +be beneficial to fetch those term values from a document in an index. A +concrete example would be to filter tweets tweeted by your followers. +Potentially the amount of user ids specified in the terms filter can be +a lot. In this scenario it makes sense to use the terms filter's terms +lookup mechanism. + +The terms lookup mechanism supports the following options: + +[horizontal] +`index`:: + The index to fetch the term values from. Defaults to the + current index. + +`type`:: + The type to fetch the term values from. + +`id`:: + The id of the document to fetch the term values from. + +`path`:: + The field specified as path to fetch the actual values for the + `terms` filter. + +`routing`:: + A custom routing value to be used when retrieving the + external terms doc. + +`cache`:: + Whether to cache the filter built from the retrieved document + (`true` - default) or whether to fetch and rebuild the filter on every + request (`false`). See "<<query-dsl-terms-filter-lookup-caching,Terms lookup caching>>" below + +The values for the `terms` filter will be fetched from a field in a +document with the specified id in the specified type and index. +Internally a get request is executed to fetch the values from the +specified path. At the moment for this feature to work the `_source` +needs to be stored. + +Also, consider using an index with a single shard and fully replicated +across all nodes if the "reference" terms data is not large. The lookup +terms filter will prefer to execute the get request on a local node if +possible, reducing the need for networking. + +["float",id="query-dsl-terms-filter-lookup-caching"] +==== Terms lookup caching + +There is an additional cache involved, which caches the lookup of the +lookup document to the actual terms. This lookup cache is a LRU cache. +This cache has the following options: + +`indices.cache.filter.terms.size`:: + The size of the lookup cache. The default is `10mb`. + +`indices.cache.filter.terms.expire_after_access`:: + The time after the last read an entry should expire. Disabled by default. + +`indices.cache.filter.terms.expire_after_write`: + The time after the last write an entry should expire. Disabled by default. + +All options for the lookup of the documents cache can only be configured +via the `elasticsearch.yml` file. + +When using the terms lookup the `execution` option isn't taken into +account and behaves as if the execution mode was set to `plain`. + +[float] +==== Terms lookup twitter example + +[source,js] +-------------------------------------------------- +# index the information for user with id 2, specifically, its followers +curl -XPUT localhost:9200/users/user/2 -d '{ + "followers" : ["1", "3"] +}' + +# index a tweet, from user with id 2 +curl -XPUT localhost:9200/tweets/tweet/1 -d '{ + "user" : "2" +}' + +# search on all the tweets that match the followers of user 2 +curl -XGET localhost:9200/tweets/_search -d '{ + "query" : { + "filtered" : { + "filter" : { + "terms" : { + "user" : { + "index" : "users", + "type" : "user", + "id" : "2", + "path" : "followers" + }, + "_cache_key" : "user_2_friends" + } + } + } + } +}' +-------------------------------------------------- + +The above is highly optimized, both in a sense that the list of +followers will not be fetched if the filter is already cached in the +filter cache, and with internal LRU cache for fetching external values +for the terms filter. Also, the entry in the filter cache will not hold +`all` the terms reducing the memory required for it. + +`_cache_key` is recommended to be set, so its simple to clear the cache +associated with it using the clear cache API. For example: + +[source,js] +-------------------------------------------------- +curl -XPOST 'localhost:9200/tweets/_cache/clear?filter_keys=user_2_friends' +-------------------------------------------------- + +The structure of the external terms document can also include array of +inner objects, for example: + +[source,js] +-------------------------------------------------- +curl -XPUT localhost:9200/users/user/2 -d '{ + "followers" : [ + { + "id" : "1" + }, + { + "id" : "2" + } + ] +}' +-------------------------------------------------- + +In which case, the lookup path will be `followers.id`. diff --git a/docs/reference/query-dsl/filters/type-filter.asciidoc b/docs/reference/query-dsl/filters/type-filter.asciidoc new file mode 100644 index 0000000..07bde38 --- /dev/null +++ b/docs/reference/query-dsl/filters/type-filter.asciidoc @@ -0,0 +1,15 @@ +[[query-dsl-type-filter]] +=== Type Filter + +Filters documents matching the provided document / mapping type. Note, +this filter can work even when the `_type` field is not indexed (using +the <<mapping-uid-field,_uid>> field). + +[source,js] +-------------------------------------------------- +{ + "type" : { + "value" : "my_type" + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries.asciidoc b/docs/reference/query-dsl/queries.asciidoc new file mode 100644 index 0000000..e82600a --- /dev/null +++ b/docs/reference/query-dsl/queries.asciidoc @@ -0,0 +1,83 @@ +[[query-dsl-queries]] +== Queries + +As a general rule, queries should be used instead of filters: + +* for full text search +* where the result depends on a relevance score + +include::queries/match-query.asciidoc[] + +include::queries/multi-match-query.asciidoc[] + +include::queries/bool-query.asciidoc[] + +include::queries/boosting-query.asciidoc[] + +include::queries/common-terms-query.asciidoc[] + +include::queries/constant-score-query.asciidoc[] + +include::queries/dis-max-query.asciidoc[] + +include::queries/filtered-query.asciidoc[] + +include::queries/flt-query.asciidoc[] + +include::queries/flt-field-query.asciidoc[] + +include::queries/function-score-query.asciidoc[] + +include::queries/fuzzy-query.asciidoc[] + +include::queries/geo-shape-query.asciidoc[] + +include::queries/has-child-query.asciidoc[] + +include::queries/has-parent-query.asciidoc[] + +include::queries/ids-query.asciidoc[] + +include::queries/indices-query.asciidoc[] + +include::queries/match-all-query.asciidoc[] + +include::queries/mlt-query.asciidoc[] + +include::queries/mlt-field-query.asciidoc[] + +include::queries/nested-query.asciidoc[] + +include::queries/prefix-query.asciidoc[] + +include::queries/query-string-query.asciidoc[] + +include::queries/simple-query-string-query.asciidoc[] + +include::queries/range-query.asciidoc[] + +include::queries/regexp-query.asciidoc[] + +include::queries/span-first-query.asciidoc[] + +include::queries/span-multi-term-query.asciidoc[] + +include::queries/span-near-query.asciidoc[] + +include::queries/span-not-query.asciidoc[] + +include::queries/span-or-query.asciidoc[] + +include::queries/span-term-query.asciidoc[] + +include::queries/term-query.asciidoc[] + +include::queries/terms-query.asciidoc[] + +include::queries/top-children-query.asciidoc[] + +include::queries/wildcard-query.asciidoc[] + +include::queries/minimum-should-match.asciidoc[] + +include::queries/multi-term-rewrite.asciidoc[] diff --git a/docs/reference/query-dsl/queries/bool-query.asciidoc b/docs/reference/query-dsl/queries/bool-query.asciidoc new file mode 100644 index 0000000..a9b565c --- /dev/null +++ b/docs/reference/query-dsl/queries/bool-query.asciidoc @@ -0,0 +1,54 @@ +[[query-dsl-bool-query]] +=== Bool Query + +A query that matches documents matching boolean combinations of other +queries. The bool query maps to Lucene `BooleanQuery`. It is built using +one or more boolean clauses, each clause with a typed occurrence. The +occurrence types are: + +[cols="<,<",options="header",] +|======================================================================= +|Occur |Description +|`must` |The clause (query) must appear in matching documents. + +|`should` |The clause (query) should appear in the matching document. In +a boolean query with no `must` clauses, one or more `should` clauses +must match a document. The minimum number of should clauses to match can +be set using the +<<query-dsl-minimum-should-match,`minimum_should_match`>> +parameter. + +|`must_not` |The clause (query) must not appear in the matching +documents. +|======================================================================= + +The bool query also supports `disable_coord` parameter (defaults to +`false`). Basically the coord similarity computes a score factor based +on the fraction of all query terms that a document contains. See Lucene +`BooleanQuery` for more details. + +[source,js] +-------------------------------------------------- +{ + "bool" : { + "must" : { + "term" : { "user" : "kimchy" } + }, + "must_not" : { + "range" : { + "age" : { "from" : 10, "to" : 20 } + } + }, + "should" : [ + { + "term" : { "tag" : "wow" } + }, + { + "term" : { "tag" : "elasticsearch" } + } + ], + "minimum_should_match" : 1, + "boost" : 1.0 + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/boosting-query.asciidoc b/docs/reference/query-dsl/queries/boosting-query.asciidoc new file mode 100644 index 0000000..969b3bb --- /dev/null +++ b/docs/reference/query-dsl/queries/boosting-query.asciidoc @@ -0,0 +1,26 @@ +[[query-dsl-boosting-query]] +=== Boosting Query + +The `boosting` query can be used to effectively demote results that +match a given query. Unlike the "NOT" clause in bool query, this still +selects documents that contain undesirable terms, but reduces their +overall score. + +[source,js] +-------------------------------------------------- +{ + "boosting" : { + "positive" : { + "term" : { + "field1" : "value1" + } + }, + "negative" : { + "term" : { + "field2" : "value2" + } + }, + "negative_boost" : 0.2 + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/common-terms-query.asciidoc b/docs/reference/query-dsl/queries/common-terms-query.asciidoc new file mode 100644 index 0000000..256d9bb --- /dev/null +++ b/docs/reference/query-dsl/queries/common-terms-query.asciidoc @@ -0,0 +1,263 @@ +[[query-dsl-common-terms-query]] +=== Common Terms Query + +The `common` terms query is a modern alternative to stopwords which +improves the precision and recall of search results (by taking stopwords +into account), without sacrificing performance. + +[float] +==== The problem + +Every term in a query has a cost. A search for `"The brown fox"` +requires three term queries, one for each of `"the"`, `"brown"` and +`"fox"`, all of which are executed against all documents in the index. +The query for `"the"` is likely to match many documents and thus has a +much smaller impact on relevance than the other two terms. + +Previously, the solution to this problem was to ignore terms with high +frequency. By treating `"the"` as a _stopword_, we reduce the index size +and reduce the number of term queries that need to be executed. + +The problem with this approach is that, while stopwords have a small +impact on relevance, they are still important. If we remove stopwords, +we lose precision, (eg we are unable to distinguish between `"happy"` +and `"not happy"`) and we lose recall (eg text like `"The The"` or +`"To be or not to be"` would simply not exist in the index). + +[float] +==== The solution + +The `common` terms query divides the query terms into two groups: more +important (ie _low frequency_ terms) and less important (ie _high +frequency_ terms which would previously have been stopwords). + +First it searches for documents which match the more important terms. +These are the terms which appear in fewer documents and have a greater +impact on relevance. + +Then, it executes a second query for the less important terms -- terms +which appear frequently and have a low impact on relevance. But instead +of calculating the relevance score for *all* matching documents, it only +calculates the `_score` for documents already matched by the first +query. In this way the high frequency terms can improve the relevance +calculation without paying the cost of poor performance. + +If a query consists only of high frequency terms, then a single query is +executed as an `AND` (conjunction) query, in other words all terms are +required. Even though each individual term will match many documents, +the combination of terms narrows down the resultset to only the most +relevant. The single query can also be executed as an `OR` with a +specific +<<query-dsl-minimum-should-match,`minimum_should_match`>>, +in this case a high enough value should probably be used. + +Terms are allocated to the high or low frequency groups based on the +`cutoff_frequency`, which can be specified as an absolute frequency +(`>=1`) or as a relative frequency (`0.0 .. 1.0`). + +Perhaps the most interesting property of this query is that it adapts to +domain specific stopwords automatically. For example, on a video hosting +site, common terms like `"clip"` or `"video"` will automatically behave +as stopwords without the need to maintain a manual list. + +[float] +==== Examples + +In this example, words that have a document frequency greater than 0.1% +(eg `"this"` and `"is"`) will be treated as _common terms_. + +[source,js] +-------------------------------------------------- +{ + "common": { + "body": { + "query": "this is bonsai cool", + "cutoff_frequency": 0.001 + } + } +} +-------------------------------------------------- + +The number of terms which should match can be controlled with the +<<query-dsl-minimum-should-match,`minimum_should_match`>> +(`high_freq`, `low_freq`), `low_freq_operator` (default `"or"`) and +`high_freq_operator` (default `"or"`) parameters. + +For low frequency terms, set the `low_freq_operator` to `"and"` to make +all terms required: + +[source,js] +-------------------------------------------------- +{ + "common": { + "body": { + "query": "nelly the elephant as a cartoon", + "cutoff_frequency": 0.001, + "low_freq_operator" "and" + } + } +} +-------------------------------------------------- + +which is roughly equivalent to: + +[source,js] +-------------------------------------------------- +{ + "bool": { + "must": [ + { "term": { "body": "nelly"}}, + { "term": { "body": "elephant"}}, + { "term": { "body": "cartoon"}} + ], + "should": [ + { "term": { "body": "the"}} + { "term": { "body": "as"}} + { "term": { "body": "a"}} + ] + } +} +-------------------------------------------------- + +Alternatively use +<<query-dsl-minimum-should-match,`minimum_should_match`>> +to specify a minimum number or percentage of low frequency terms which +must be present, for instance: + +[source,js] +-------------------------------------------------- +{ + "common": { + "body": { + "query": "nelly the elephant as a cartoon", + "cutoff_frequency": 0.001, + "minimum_should_match": 2 + } + } +} +-------------------------------------------------- + +which is roughly equivalent to: + +[source,js] +-------------------------------------------------- +{ + "bool": { + "must": { + "bool": { + "should": [ + { "term": { "body": "nelly"}}, + { "term": { "body": "elephant"}}, + { "term": { "body": "cartoon"}} + ], + "minimum_should_match": 2 + } + }, + "should": [ + { "term": { "body": "the"}} + { "term": { "body": "as"}} + { "term": { "body": "a"}} + ] + } +} +-------------------------------------------------- + +minimum_should_match + +A different +<<query-dsl-minimum-should-match,`minimum_should_match`>> +can be applied for low and high frequency terms with the additional +`low_freq` and `high_freq` parameters Here is an example when providing +additional parameters (note the change in structure): + +[source,js] +-------------------------------------------------- +{ + "common": { + "body": { + "query": "nelly the elephant not as a cartoon", + "cutoff_frequency": 0.001, + "minimum_should_match": { + "low_freq" : 2, + "high_freq" : 3 + } + } + } +} +-------------------------------------------------- + +which is roughly equivalent to: + +[source,js] +-------------------------------------------------- +{ + "bool": { + "must": { + "bool": { + "should": [ + { "term": { "body": "nelly"}}, + { "term": { "body": "elephant"}}, + { "term": { "body": "cartoon"}} + ], + "minimum_should_match": 2 + } + }, + "should": { + "bool": { + "should": [ + { "term": { "body": "the"}}, + { "term": { "body": "not"}}, + { "term": { "body": "as"}}, + { "term": { "body": "a"}} + ], + "minimum_should_match": 3 + } + } + } +} +-------------------------------------------------- + +In this case it means the high frequency terms have only an impact on +relevance when there are at least three of them. But the most +interesting use of the +<<query-dsl-minimum-should-match,`minimum_should_match`>> +for high frequency terms is when there are only high frequency terms: + +[source,js] +-------------------------------------------------- +{ + "common": { + "body": { + "query": "how not to be", + "cutoff_frequency": 0.001, + "minimum_should_match": { + "low_freq" : 2, + "high_freq" : 3 + } + } + } +} +-------------------------------------------------- + +which is roughly equivalent to: + +[source,js] +-------------------------------------------------- +{ + "bool": { + "should": [ + { "term": { "body": "how"}}, + { "term": { "body": "not"}}, + { "term": { "body": "to"}}, + { "term": { "body": "be"}} + ], + "minimum_should_match": "3<50%" + } +} +-------------------------------------------------- + +The high frequency generated query is then slightly less restrictive +than with an `AND`. + +The `common` terms query also supports `boost`, `analyzer` and +`disable_coord` as parameters. diff --git a/docs/reference/query-dsl/queries/constant-score-query.asciidoc b/docs/reference/query-dsl/queries/constant-score-query.asciidoc new file mode 100644 index 0000000..06ed6f7 --- /dev/null +++ b/docs/reference/query-dsl/queries/constant-score-query.asciidoc @@ -0,0 +1,36 @@ +[[query-dsl-constant-score-query]] +=== Constant Score Query + +A query that wraps a filter or another query and simply returns a +constant score equal to the query boost for every document in the +filter. Maps to Lucene `ConstantScoreQuery`. + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "filter" : { + "term" : { "user" : "kimchy"} + }, + "boost" : 1.2 + } +} +-------------------------------------------------- + +The filter object can hold only filter elements, not queries. Filters +can be much faster compared to queries since they don't perform any +scoring, especially when they are cached. + +A query can also be wrapped in a `constant_score` query: + +[source,js] +-------------------------------------------------- +{ + "constant_score" : { + "query" : { + "term" : { "user" : "kimchy"} + }, + "boost" : 1.2 + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/dis-max-query.asciidoc b/docs/reference/query-dsl/queries/dis-max-query.asciidoc new file mode 100644 index 0000000..2938c8d --- /dev/null +++ b/docs/reference/query-dsl/queries/dis-max-query.asciidoc @@ -0,0 +1,44 @@ +[[query-dsl-dis-max-query]] +=== Dis Max Query + +A query that generates the union of documents produced by its +subqueries, and that scores each document with the maximum score for +that document as produced by any subquery, plus a tie breaking increment +for any additional matching subqueries. + +This is useful when searching for a word in multiple fields with +different boost factors (so that the fields cannot be combined +equivalently into a single search field). We want the primary score to +be the one associated with the highest boost, not the sum of the field +scores (as Boolean Query would give). If the query is "albino elephant" +this ensures that "albino" matching one field and "elephant" matching +another gets a higher score than "albino" matching both fields. To get +this result, use both Boolean Query and DisjunctionMax Query: for each +term a DisjunctionMaxQuery searches for it in each field, while the set +of these DisjunctionMaxQuery's is combined into a BooleanQuery. + +The tie breaker capability allows results that include the same term in +multiple fields to be judged better than results that include this term +in only the best of those multiple fields, without confusing this with +the better case of two different terms in the multiple fields.The +default `tie_breaker` is `0.0`. + +This query maps to Lucene `DisjunctionMaxQuery`. + +[source,js] +-------------------------------------------------- +{ + "dis_max" : { + "tie_breaker" : 0.7, + "boost" : 1.2, + "queries" : [ + { + "term" : { "age" : 34 } + }, + { + "term" : { "age" : 35 } + } + ] + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/filtered-query.asciidoc b/docs/reference/query-dsl/queries/filtered-query.asciidoc new file mode 100644 index 0000000..bf51e9c --- /dev/null +++ b/docs/reference/query-dsl/queries/filtered-query.asciidoc @@ -0,0 +1,25 @@ +[[query-dsl-filtered-query]] +=== Filtered Query + +A query that applies a filter to the results of another query. This +query maps to Lucene `FilteredQuery`. + +[source,js] +-------------------------------------------------- +{ + "filtered" : { + "query" : { + "term" : { "tag" : "wow" } + }, + "filter" : { + "range" : { + "age" : { "from" : 10, "to" : 20 } + } + } + } +} +-------------------------------------------------- + +The filter object can hold only filter elements, not queries. Filters +can be much faster compared to queries since they don't perform any +scoring, especially when they are cached. diff --git a/docs/reference/query-dsl/queries/flt-field-query.asciidoc b/docs/reference/query-dsl/queries/flt-field-query.asciidoc new file mode 100644 index 0000000..205dc61 --- /dev/null +++ b/docs/reference/query-dsl/queries/flt-field-query.asciidoc @@ -0,0 +1,47 @@ +[[query-dsl-flt-field-query]] +=== Fuzzy Like This Field Query + +The `fuzzy_like_this_field` query is the same as the `fuzzy_like_this` +query, except that it runs against a single field. It provides nicer +query DSL over the generic `fuzzy_like_this` query, and support typed +fields query (automatically wraps typed fields with type filter to match +only on the specific type). + +[source,js] +-------------------------------------------------- +{ + "fuzzy_like_this_field" : { + "name.first" : { + "like_text" : "text like this one", + "max_query_terms" : 12 + } + } +} +-------------------------------------------------- + +`fuzzy_like_this_field` can be shortened to `flt_field`. + +The `fuzzy_like_this_field` top level parameters include: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`like_text` |The text to find documents like it, *required*. + +|`ignore_tf` |Should term frequency be ignored. Defaults to `false`. + +|`max_query_terms` |The maximum number of query terms that will be +included in any generated query. Defaults to `25`. + +|`fuzziness` |The fuzziness of the term variants. Defaults +to `0.5`. See <<fuzziness>>. + +|`prefix_length` |Length of required common prefix on variant terms. +Defaults to `0`. + +|`boost` |Sets the boost value of the query. Defaults to `1.0`. + +|`analyzer` |The analyzer that will be used to analyze the text. +Defaults to the analyzer associated with the field. +|======================================================================= + diff --git a/docs/reference/query-dsl/queries/flt-query.asciidoc b/docs/reference/query-dsl/queries/flt-query.asciidoc new file mode 100644 index 0000000..231de6b --- /dev/null +++ b/docs/reference/query-dsl/queries/flt-query.asciidoc @@ -0,0 +1,65 @@ +[[query-dsl-flt-query]] +=== Fuzzy Like This Query + +Fuzzy like this query find documents that are "like" provided text by +running it against one or more fields. + +[source,js] +-------------------------------------------------- +{ + "fuzzy_like_this" : { + "fields" : ["name.first", "name.last"], + "like_text" : "text like this one", + "max_query_terms" : 12 + } +} +-------------------------------------------------- + +`fuzzy_like_this` can be shortened to `flt`. + +The `fuzzy_like_this` top level parameters include: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`fields` |A list of the fields to run the more like this query against. +Defaults to the `_all` field. + +|`like_text` |The text to find documents like it, *required*. + +|`ignore_tf` |Should term frequency be ignored. Defaults to `false`. + +|`max_query_terms` |The maximum number of query terms that will be +included in any generated query. Defaults to `25`. + +|`fuzziness` |The minimum similarity of the term variants. Defaults +to `0.5`. See <<fuzziness>>. + +|`prefix_length` |Length of required common prefix on variant terms. +Defaults to `0`. + +|`boost` |Sets the boost value of the query. Defaults to `1.0`. + +|`analyzer` |The analyzer that will be used to analyze the text. +Defaults to the analyzer associated with the field. +|======================================================================= + +[float] +==== How it Works + +Fuzzifies ALL terms provided as strings and then picks the best n +differentiating terms. In effect this mixes the behaviour of FuzzyQuery +and MoreLikeThis but with special consideration of fuzzy scoring +factors. This generally produces good results for queries where users +may provide details in a number of fields and have no knowledge of +boolean query syntax and also want a degree of fuzzy matching and a fast +query. + +For each source term the fuzzy variants are held in a BooleanQuery with +no coord factor (because we are not looking for matches on multiple +variants in any one doc). Additionally, a specialized TermQuery is used +for variants and does not use that variant term's IDF because this would +favor rarer terms, such as misspellings. Instead, all variants use the +same IDF ranking (the one for the source query term) and this is +factored into the variant's boost. If the source query term does not +exist in the index the average IDF of the variants is used. diff --git a/docs/reference/query-dsl/queries/function-score-query.asciidoc b/docs/reference/query-dsl/queries/function-score-query.asciidoc new file mode 100644 index 0000000..fa5b2bd --- /dev/null +++ b/docs/reference/query-dsl/queries/function-score-query.asciidoc @@ -0,0 +1,491 @@ +[[query-dsl-function-score-query]] +=== Function Score Query + +The `function_score` allows you to modify the score of documents that are +retrieved by a query. This can be useful if, for example, a score +function is computationally expensive and it is sufficient to compute +the score on a filtered set of documents. + +`function_score` provides the same functionality that +`custom_boost_factor`, `custom_score` and +`custom_filters_score` provided +but furthermore adds futher scoring functionality such as +distance and recency scoring (see description below). + +==== Using function score + +To use `function_score`, the user has to define a query and one or +several functions, that compute a new score for each document returned +by the query. + +`function_score` can be used with only one function like this: + +[source,js] +-------------------------------------------------- +"function_score": { + "(query|filter)": {}, + "boost": "boost for the whole query", + "FUNCTION": {}, + "boost_mode":"(multiply|replace|...)" +} +-------------------------------------------------- + +Furthermore, several functions can be combined. In this case one can +optionally choose to apply the function only if a document matches a +given filter: + +[source,js] +-------------------------------------------------- +"function_score": { + "(query|filter)": {}, + "boost": "boost for the whole query", + "functions": [ + { + "filter": {}, + "FUNCTION": {} + }, + { + "FUNCTION": {} + } + ], + "max_boost": number, + "score_mode": "(multiply|max|...)", + "boost_mode": "(multiply|replace|...)" +} +-------------------------------------------------- + +If no filter is given with a function this is equivalent to specifying +`"match_all": {}` + +First, each document is scored by the defined functons. The parameter +`score_mode` specifies how the computed scores are combined: + +[horizontal] +`multiply`:: scores are multiplied (default) +`sum`:: scores are summed +`avg`:: scores are averaged +`first`:: the first function that has a matching filter + is applied +`max`:: maximum score is used +`min`:: minimum score is used + +The new score can be restricted to not exceed a certain limit by setting +the `max_boost` parameter. The default for `max_boost` is FLT_MAX. + +Finally, the newly computed score is combined with the score of the +query. The parameter `boost_mode` defines how: + +[horizontal] +`multiply`:: query score and function score is multiplied (default) +`replace`:: only function score is used, the query score is ignored +`sum`:: query score and function score are added +`avg`:: average +`max`:: max of query score and function score +`min`:: min of query score and function score + + +==== Score functions + +The `function_score` query provides several types of score functions. + +===== Script score + +The `script_score` function allows you to wrap another query and customize +the scoring of it optionally with a computation derived from other numeric +field values in the doc using a script expression. Here is a +simple sample: + +[source,js] +-------------------------------------------------- +"script_score" : { + "script" : "_score * doc['my_numeric_field'].value" +} +-------------------------------------------------- + +On top of the different scripting field values and expression, the +`_score` script parameter can be used to retrieve the score based on the +wrapped query. + +Scripts are cached for faster execution. If the script has parameters +that it needs to take into account, it is preferable to reuse the same +script, and provide parameters to it: + +[source,js] +-------------------------------------------------- +"script_score": { + "lang": "lang", + "params": { + "param1": value1, + "param2": value2 + }, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" +} +-------------------------------------------------- + +Note that unlike the `custom_score` query, the +score of the query is multiplied with the result of the script scoring. If +you wish to inhibit this, set `"boost_mode": "replace"` + +===== Boost factor + +The `boost_factor` score allows you to multiply the score by the provided +`boost_factor`. This can sometimes be desired since boost value set on +specific queries gets normalized, while for this score function it does +not. + +[source,js] +-------------------------------------------------- +"boost_factor" : number +-------------------------------------------------- + +===== Random + +The `random_score` generates scores via a pseudo random number algorithm +that is initialized with a `seed`. + +[source,js] +-------------------------------------------------- +"random_score": { + "seed" : number +} +-------------------------------------------------- + +===== Decay functions + +Decay functions score a document with a function that decays depending +on the distance of a numeric field value of the document from a user +given origin. This is similar to a range query, but with smooth edges +instead of boxes. + +To use distance scoring on a query that has numerical fields, the user +has to define an `origin` and a `scale` for each field. The `origin` +is needed to define the ``central point'' from which the distance +is calculated, and the `scale` to define the rate of decay. The +decay function is specified as + +[source,js] +-------------------------------------------------- +"DECAY_FUNCTION": { + "FIELD_NAME": { + "origin": "11, 12", + "scale": "2km", + "offset": "0km", + "decay": 0.33 + } +} +-------------------------------------------------- + +where `DECAY_FUNCTION` can be "linear", "exp" and "gauss" (see below). The specified field must be a numeric field. In the above example, the field is a <<mapping-geo-point-type>> and origin can be provided in geo format. `scale` and `offset` must be given with a unit in this case. If your field is a date field, you can set `scale` and `offset` as days, weeks, and so on. Example: + +[source,js] +-------------------------------------------------- + "DECAY_FUNCTION": { + "FIELD_NAME": { + "origin": "2013-09-17", + "scale": "10d", + "offset": "5d", + "decay" : 0.5 + } + } +-------------------------------------------------- + + +The format of the origin depends on the <<mapping-date-format>> defined in your mapping. If you do not define the origin, the current time is used. + + +The `offset` and `decay` parameters are optional. + +[horizontal] +`offset`:: + If an `offset` is defined, the decay function will only compute a the + decay function for documents with a distance greater that the defined + `offset`. The default is 0. + +`decay`:: + The `decay` parameter defines how documents are scored at the distance + given at `scale`. If no `decay` is defined, documents at the distance + `scale` will be scored 0.5. + +In the first example, your documents might represents hotels and contain a geo +location field. You want to compute a decay function depending on how +far the hotel is from a given location. You might not immediately see +what scale to choose for the gauss function, but you can say something +like: "At a distance of 2km from the desired location, the score should +be reduced by one third." +The parameter "scale" will then be adjusted automatically to assure that +the score function computes a score of 0.5 for hotels that are 2km away +from the desired location. + + +In the second example, documents with a field value between 2013-09-12 and 2013-09-22 would get a weight of 1.0 and documents which are 15 days from that date a weight of 0.5. + + + +The `DECAY_FUNCTION` determines the shape of the decay: + +[horizontal] +`gauss`:: + +Normal decay, computed as: ++ +image:images/Gaussian.png[] + +`exp`:: + +Exponential decay, computed as: ++ +image:images/Exponential.png[] + + +`linear`:: +Linear decay, computed as: ++ +image:images/Linear.png[]. ++ +In contrast to the normal and exponential decay, this function actually +sets the score to 0 if the field value exceeds twice the user given +scale value. + +==== Detailed example + +Suppose you are searching for a hotel in a certain town. Your budget is +limited. Also, you would like the hotel to be close to the town center, +so the farther the hotel is from the desired location the less likely +you are to check in. + +You would like the query results that match your criterion (for +example, "hotel, Nancy, non-smoker") to be scored with respect to +distance to the town center and also the price. + +Intuitively, you would like to define the town center as the origin and +maybe you are willing to walk 2km to the town center from the hotel. + +In this case your *origin* for the location field is the town center +and the *scale* is ~2km. + +If your budget is low, you would probably prefer something cheap above +something expensive. For the price field, the *origin* would be 0 Euros +and the *scale* depends on how much you are willing to pay, for example 20 Euros. + +In this example, the fields might be called "price" for the price of the +hotel and "location" for the coordinates of this hotel. + +The function for `price` in this case would be + +[source,js] +-------------------------------------------------- +"DECAY_FUNCTION": { + "price": { + "origin": "0", + "scale": "20" + } +} +-------------------------------------------------- + +and for `location`: + +[source,js] +-------------------------------------------------- + +"DECAY_FUNCTION": { + "location": { + "origin": "11, 12", + "scale": "2km" + } +} +-------------------------------------------------- + +where `DECAY_FUNCTION` can be "linear", "exp" and "gauss". + +Suppose you want to multiply these two functions on the original score, +the request would look like this: + +[source,js] +-------------------------------------------------- +curl 'localhost:9200/hotels/_search/' -d '{ +"query": { + "function_score": { + "functions": [ + { + "DECAY_FUNCTION": { + "price": { + "origin": "0", + "scale": "20" + } + } + }, + { + "DECAY_FUNCTION": { + "location": { + "origin": "11, 12", + "scale": "2km" + } + } + } + ], + "query": { + "match": { + "properties": "balcony" + } + }, + "score_mode": "multiply" + } +} +}' +-------------------------------------------------- + +Next, we show how the computed score looks like for each of the three +possible decay functions. + +===== Normal decay, keyword `gauss` + +When choosing `gauss` as the decay function in the above example, the +contour and surface plot of the multiplier looks like this: + +image::https://f.cloud.github.com/assets/4320215/768157/cd0e18a6-e898-11e2-9b3c-f0145078bd6f.png[width="700px"] + +image::https://f.cloud.github.com/assets/4320215/768160/ec43c928-e898-11e2-8e0d-f3c4519dbd89.png[width="700px"] + +Suppose your original search results matches three hotels : + +* "Backback Nap" +* "Drink n Drive" +* "BnB Bellevue". + +"Drink n Drive" is pretty far from your defined location (nearly 2 km) +and is not too cheap (about 13 Euros) so it gets a low factor a factor +of 0.56. "BnB Bellevue" and "Backback Nap" are both pretty close to the +defined location but "BnB Bellevue" is cheaper, so it gets a multiplier +of 0.86 whereas "Backpack Nap" gets a value of 0.66. + +===== Exponential decay, keyword `exp` + +When choosing `exp` as the decay function in the above example, the +contour and surface plot of the multiplier looks like this: + +image::https://f.cloud.github.com/assets/4320215/768161/082975c0-e899-11e2-86f7-174c3a729d64.png[width="700px"] + +image::https://f.cloud.github.com/assets/4320215/768162/0b606884-e899-11e2-907b-aefc77eefef6.png[width="700px"] + +===== Linear' decay, keyword `linear` + +When choosing `linear` as the decay function in the above example, the +contour and surface plot of the multiplier looks like this: + +image::https://f.cloud.github.com/assets/4320215/768164/1775b0ca-e899-11e2-9f4a-776b406305c6.png[width="700px"] + +image::https://f.cloud.github.com/assets/4320215/768165/19d8b1aa-e899-11e2-91bc-6b0553e8d722.png[width="700px"] + +==== Supported fields for decay functions + +Only single valued numeric fields, including time and geo locations, +are supported. + +==== What is a field is missing? + +If the numeric field is missing in the document, the function will +return 1. + +==== Relation to `custom_boost`, `custom_score` and `custom_filters_score` + +The `custom_boost_factor` query + +[source,js] +-------------------------------------------------- +"custom_boost_factor": { + "boost_factor": 5.2, + "query": {...} +} +-------------------------------------------------- + +becomes + +[source,js] +-------------------------------------------------- +"function_score": { + "boost_factor": 5.2, + "query": {...} +} +-------------------------------------------------- + +The `custom_score` query + +[source,js] +-------------------------------------------------- +"custom_score": { + "params": { + "param1": 2, + "param2": 3.1 + }, + "query": {...}, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" +} +-------------------------------------------------- + +becomes + +[source,js] +-------------------------------------------------- +"function_score": { + "boost_mode": "replace", + "query": {...}, + "script_score": { + "params": { + "param1": 2, + "param2": 3.1 + }, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" + } +} +-------------------------------------------------- + +and the `custom_filters_score` + +[source,js] +-------------------------------------------------- +"custom_filters_score": { + "filters": [ + { + "boost_factor": "3", + "filter": {...} + }, + { + "filter": {…}, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" + } + ], + "params": { + "param1": 2, + "param2": 3.1 + }, + "query": {...}, + "score_mode": "first" +} +-------------------------------------------------- + +becomes: + +[source,js] +-------------------------------------------------- +"function_score": { + "functions": [ + { + "boost_factor": "3", + "filter": {...} + }, + { + "filter": {...}, + "script_score": { + "params": { + "param1": 2, + "param2": 3.1 + }, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" + } + } + ], + "query": {...}, + "score_mode": "first" +} +-------------------------------------------------- + + diff --git a/docs/reference/query-dsl/queries/fuzzy-query.asciidoc b/docs/reference/query-dsl/queries/fuzzy-query.asciidoc new file mode 100644 index 0000000..082f3f1 --- /dev/null +++ b/docs/reference/query-dsl/queries/fuzzy-query.asciidoc @@ -0,0 +1,102 @@ +[[query-dsl-fuzzy-query]] +=== Fuzzy Query + +The fuzzy query uses similarity based on Levenshtein edit distance for +`string` fields, and a `+/-` margin on numeric and date fields. + +==== String fields + +The `fuzzy` query generates all possible matching terms that are within the +maximum edit distance specified in `fuzziness` and then checks the term +dictionary to find out which of those generated terms actually exist in the +index. + +Here is a simple example: + +[source,js] +-------------------------------------------------- +{ + "fuzzy" : { "user" : "ki" } +} +-------------------------------------------------- + +Or with more advanced settings: + +[source,js] +-------------------------------------------------- +{ + "fuzzy" : { + "user" : { + "value" : "ki", + "boost" : 1.0, + "fuzziness" : 2, + "prefix_length" : 0, + "max_expansions": 100 + } + } +} +-------------------------------------------------- + +[float] +===== Parameters + +[horizontal] +`fuzziness`:: + + The maximum edit distance. Defaults to `AUTO`. See <<fuzziness>>. + +`prefix_length`:: + + The number of initial characters which will not be ``fuzzified''. This + helps to reduce the number of terms which must be examined. Defaults + to `0`. + +`max_expansions`:: + + The maximum number of terms that the `fuzzy` query will expand to. + Defaults to `0`. + + +WARNING: this query can be very heavy if `prefix_length` and `max_expansions` +are both set to their defaults of `0`. This could cause every term in the +index to be examined! + + +[float] +==== Numeric and date fields + +Performs a <<query-dsl-range-query>> ``around'' the value using the +`fuzziness` value as a `+/-` range, where: + + -fuzziness <= field value <= +fuzziness + +For example: + +[source,js] +-------------------------------------------------- +{ + "fuzzy" : { + "price" : { + "value" : 12, + "fuzziness" : 2 + } + } +} +-------------------------------------------------- + +Will result in a range query between 10 and 14. Date fields support +<<time-units,time values>>, eg: + +[source,js] +-------------------------------------------------- +{ + "fuzzy" : { + "created" : { + "value" : "2010-02-05T12:05:07", + "fuzziness" : "1d" + } + } +} +-------------------------------------------------- + +See <<fuzziness>> for more details about accepted values. diff --git a/docs/reference/query-dsl/queries/geo-shape-query.asciidoc b/docs/reference/query-dsl/queries/geo-shape-query.asciidoc new file mode 100644 index 0000000..94cd039 --- /dev/null +++ b/docs/reference/query-dsl/queries/geo-shape-query.asciidoc @@ -0,0 +1,49 @@ +[[query-dsl-geo-shape-query]] +=== GeoShape Query + +Query version of the +<<query-dsl-geo-shape-filter,geo_shape Filter>>. + +Requires the <<mapping-geo-shape-type,geo_shape +Mapping>>. + +Given a document that looks like this: + +[source,js] +-------------------------------------------------- +{ + "name": "Wind & Wetter, Berlin, Germany", + "location": { + "type": "Point", + "coordinates": [13.400544, 52.530286] + } +} +-------------------------------------------------- + +The following query will find the point: + +[source,js] +-------------------------------------------------- +{ + "query": { + "geo_shape": { + "location": { + "shape": { + "type": "envelope", + "coordinates": [[13, 53],[14, 52]] + } + } + } + } +} +-------------------------------------------------- + +See the Filter's documentation for more information. + +[float] +==== Relevancy and Score + +Currently Elasticsearch does not have any notion of geo shape relevancy, +consequently the Query internally uses a `constant_score` Query which +wraps a <<query-dsl-geo-shape-filter,geo_shape +filter>>. diff --git a/docs/reference/query-dsl/queries/has-child-query.asciidoc b/docs/reference/query-dsl/queries/has-child-query.asciidoc new file mode 100644 index 0000000..c562c2b --- /dev/null +++ b/docs/reference/query-dsl/queries/has-child-query.asciidoc @@ -0,0 +1,61 @@ +[[query-dsl-has-child-query]] +=== Has Child Query + +The `has_child` query works the same as the +<<query-dsl-has-child-filter,has_child>> filter, +by automatically wrapping the filter with a +<<query-dsl-constant-score-query,constant_score>> +(when using the default score type). It has the same syntax as the +<<query-dsl-has-child-filter,has_child>> filter: + +[source,js] +-------------------------------------------------- +{ + "has_child" : { + "type" : "blog_tag", + "query" : { + "term" : { + "tag" : "something" + } + } + } +} +-------------------------------------------------- + +An important difference with the `top_children` query is that this query +is always executed in two iterations whereas the `top_children` query +can be executed in one or more iteration. When using the `has_child` +query the `total_hits` is always correct. + +[float] +==== Scoring capabilities + +The `has_child` also has scoring support. The +supported score types are `max`, `sum`, `avg` or `none`. The default is +`none` and yields the same behaviour as in previous versions. If the +score type is set to another value than `none`, the scores of all the +matching child documents are aggregated into the associated parent +documents. The score type can be specified with the `score_type` field +inside the `has_child` query: + +[source,js] +-------------------------------------------------- +{ + "has_child" : { + "type" : "blog_tag", + "score_type" : "sum", + "query" : { + "term" : { + "tag" : "something" + } + } + } +} +-------------------------------------------------- + +[float] +==== Memory Considerations + +With the current implementation, all `_id` values are loaded to memory +(heap) in order to support fast lookups, so make sure there is enough +memory for it. diff --git a/docs/reference/query-dsl/queries/has-parent-query.asciidoc b/docs/reference/query-dsl/queries/has-parent-query.asciidoc new file mode 100644 index 0000000..8ef1f4e --- /dev/null +++ b/docs/reference/query-dsl/queries/has-parent-query.asciidoc @@ -0,0 +1,57 @@ +[[query-dsl-has-parent-query]] +=== Has Parent Query + +The `has_parent` query works the same as the +<<query-dsl-has-parent-filter,has_parent>> +filter, by automatically wrapping the filter with a constant_score (when +using the default score type). It has the same syntax as the +<<query-dsl-has-parent-filter,has_parent>> +filter. + +[source,js] +-------------------------------------------------- +{ + "has_parent" : { + "parent_type" : "blog", + "query" : { + "term" : { + "tag" : "something" + } + } + } +} +-------------------------------------------------- + +[float] +==== Scoring capabilities + +The `has_parent` also has scoring support. The +supported score types are `score` or `none`. The default is `none` and +this ignores the score from the parent document. The score is in this +case equal to the boost on the `has_parent` query (Defaults to 1). If +the score type is set to `score`, then the score of the matching parent +document is aggregated into the child documents belonging to the +matching parent document. The score type can be specified with the +`score_type` field inside the `has_parent` query: + +[source,js] +-------------------------------------------------- +{ + "has_parent" : { + "parent_type" : "blog", + "score_type" : "score", + "query" : { + "term" : { + "tag" : "something" + } + } + } +} +-------------------------------------------------- + +[float] +==== Memory Considerations + +With the current implementation, all `_id` values are loaded to memory +(heap) in order to support fast lookups, so make sure there is enough +memory for it. diff --git a/docs/reference/query-dsl/queries/ids-query.asciidoc b/docs/reference/query-dsl/queries/ids-query.asciidoc new file mode 100644 index 0000000..8de62d7 --- /dev/null +++ b/docs/reference/query-dsl/queries/ids-query.asciidoc @@ -0,0 +1,20 @@ +[[query-dsl-ids-query]] +=== Ids Query + +Filters documents that only have the provided ids. Note, this filter +does not require the <<mapping-id-field,_id>> +field to be indexed since it works using the +<<mapping-uid-field,_uid>> field. + +[source,js] +-------------------------------------------------- +{ + "ids" : { + "type" : "my_type", + "values" : ["1", "4", "100"] + } +} +-------------------------------------------------- + +The `type` is optional and can be omitted, and can also accept an array +of values. diff --git a/docs/reference/query-dsl/queries/indices-query.asciidoc b/docs/reference/query-dsl/queries/indices-query.asciidoc new file mode 100644 index 0000000..c597833 --- /dev/null +++ b/docs/reference/query-dsl/queries/indices-query.asciidoc @@ -0,0 +1,37 @@ +[[query-dsl-indices-query]] +=== Indices Query + +The `indices` query can be used when executed across multiple indices, +allowing to have a query that executes only when executed on an index +that matches a specific list of indices, and another query that executes +when it is executed on an index that does not match the listed indices. + +[source,js] +-------------------------------------------------- +{ + "indices" : { + "indices" : ["index1", "index2"], + "query" : { + "term" : { "tag" : "wow" } + }, + "no_match_query" : { + "term" : { "tag" : "kow" } + } + } +} +-------------------------------------------------- + +You can use the `index` field to provide a single index. + +`no_match_query` can also have "string" value of `none` (to match no +documents), and `all` (to match all). Defaults to `all`. + +`query` is mandatory, as well as `indices` (or `index`). + +[TIP] +=================================================================== +The fields order is important: if the `indices` are provided before `query` +or `no_match_query`, the related queries get parsed only against the indices +that they are going to be executed on. This is useful to avoid parsing queries +when it is not necessary and prevent potential mapping errors. +=================================================================== diff --git a/docs/reference/query-dsl/queries/match-all-query.asciidoc b/docs/reference/query-dsl/queries/match-all-query.asciidoc new file mode 100644 index 0000000..2ea3d41 --- /dev/null +++ b/docs/reference/query-dsl/queries/match-all-query.asciidoc @@ -0,0 +1,20 @@ +[[query-dsl-match-all-query]] +=== Match All Query + +A query that matches all documents. Maps to Lucene `MatchAllDocsQuery`. + +[source,js] +-------------------------------------------------- +{ + "match_all" : { } +} +-------------------------------------------------- + +Which can also have boost associated with it: + +[source,js] +-------------------------------------------------- +{ + "match_all" : { "boost" : 1.2 } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/match-query.asciidoc b/docs/reference/query-dsl/queries/match-query.asciidoc new file mode 100644 index 0000000..d514768 --- /dev/null +++ b/docs/reference/query-dsl/queries/match-query.asciidoc @@ -0,0 +1,234 @@ +[[query-dsl-match-query]] +=== Match Query + +A family of `match` queries that accept text/numerics/dates, analyzes +it, and constructs a query out of it. For example: + +[source,js] +-------------------------------------------------- +{ + "match" : { + "message" : "this is a test" + } +} +-------------------------------------------------- + +Note, `message` is the name of a field, you can substitute the name of +any field (including `_all`) instead. + +[float] +==== Types of Match Queries + +[float] +===== boolean + +The default `match` query is of type `boolean`. It means that the text +provided is analyzed and the analysis process constructs a boolean query +from the provided text. The `operator` flag can be set to `or` or `and` +to control the boolean clauses (defaults to `or`). The minimum number of +should clauses to match can be set using the +<<query-dsl-minimum-should-match,`minimum_should_match`>> +parameter. + +The `analyzer` can be set to control which analyzer will perform the +analysis process on the text. It default to the field explicit mapping +definition, or the default search analyzer. + +`fuzziness` allows _fuzzy matching_ based on the type of field being queried. +See <<fuzziness>> for allowed settings. + +The `prefix_length` and +`max_expansions` can be set in this case to control the fuzzy process. +If the fuzzy option is set the query will use `constant_score_rewrite` +as its <<query-dsl-multi-term-rewrite,rewrite +method>> the `rewrite` parameter allows to control how the query will get +rewritten. + +Here is an example when providing additional parameters (note the slight +change in structure, `message` is the field name): + +[source,js] +-------------------------------------------------- +{ + "match" : { + "message" : { + "query" : "this is a test", + "operator" : "and" + } + } +} +-------------------------------------------------- + +.zero_terms_query +If the analyzer used removes all tokens in a query like a `stop` filter +does, the default behavior is to match no documents at all. In order to +change that the `zero_terms_query` option can be used, which accepts +`none` (default) and `all` which corresponds to a `match_all` query. + +[source,js] +-------------------------------------------------- +{ + "match" : { + "message" : { + "query" : "to be or not to be", + "operator" : "and", + "zero_terms_query": "all" + } + } +} +-------------------------------------------------- + +.cutoff_frequency +The match query supports a `cutoff_frequency` that allows +specifying an absolute or relative document frequency where high +frequent terms are moved into an optional subquery and are only scored +if one of the low frequent (below the cutoff) terms in the case of an +`or` operator or all of the low frequent terms in the case of an `and` +operator match. + +This query allows handling `stopwords` dynamically at runtime, is domain +independent and doesn't require on a stopword file. It prevent scoring / +iterating high frequent terms and only takes the terms into account if a +more significant / lower frequent terms match a document. Yet, if all of +the query terms are above the given `cutoff_frequency` the query is +automatically transformed into a pure conjunction (`and`) query to +ensure fast execution. + +The `cutoff_frequency` can either be relative to the number of documents +in the index if in the range `[0..1)` or absolute if greater or equal to +`1.0`. + +Note: If the `cutoff_frequency` is used and the operator is `and` +_stacked tokens_ (tokens that are on the same position like `synonym` filter emits) +are not handled gracefully as they are in a pure `and` query. For instance the query +`fast fox` is analyzed into 3 terms `[fast, quick, fox]` where `quick` is a synonym +for `fast` on the same token positions the query might require `fast` and `quick` to +match if the operator is `and`. + +Here is an example showing a query composed of stopwords exclusivly: + +[source,js] +-------------------------------------------------- +{ + "match" : { + "message" : { + "query" : "to be or not to be", + "cutoff_frequency" : 0.001 + } + } +} +-------------------------------------------------- + +[float] +===== phrase + +The `match_phrase` query analyzes the text and creates a `phrase` query +out of the analyzed text. For example: + +[source,js] +-------------------------------------------------- +{ + "match_phrase" : { + "message" : "this is a test" + } +} +-------------------------------------------------- + +Since `match_phrase` is only a `type` of a `match` query, it can also be +used in the following manner: + +[source,js] +-------------------------------------------------- +{ + "match" : { + "message" : { + "query" : "this is a test", + "type" : "phrase" + } + } +} +-------------------------------------------------- + +A phrase query matches terms up to a configurable `slop` +(which defaults to 0) in any order. Transposed terms have a slop of 2. + +The `analyzer` can be set to control which analyzer will perform the +analysis process on the text. It default to the field explicit mapping +definition, or the default search analyzer, for example: + +[source,js] +-------------------------------------------------- +{ + "match_phrase" : { + "message" : { + "query" : "this is a test", + "analyzer" : "my_analyzer" + } + } +} +-------------------------------------------------- + +[float] +===== match_phrase_prefix + +The `match_phrase_prefix` is the same as `match_phrase`, except that it +allows for prefix matches on the last term in the text. For example: + +[source,js] +-------------------------------------------------- +{ + "match_phrase_prefix" : { + "message" : "this is a test" + } +} +-------------------------------------------------- + +Or: + +[source,js] +-------------------------------------------------- +{ + "match" : { + "message" : { + "query" : "this is a test", + "type" : "phrase_prefix" + } + } +} +-------------------------------------------------- + +It accepts the same parameters as the phrase type. In addition, it also +accepts a `max_expansions` parameter that can control to how many +prefixes the last term will be expanded. It is highly recommended to set +it to an acceptable value to control the execution time of the query. +For example: + +[source,js] +-------------------------------------------------- +{ + "match_phrase_prefix" : { + "message" : { + "query" : "this is a test", + "max_expansions" : 10 + } + } +} +-------------------------------------------------- + +[float] +==== Comparison to query_string / field + +The match family of queries does not go through a "query parsing" +process. It does not support field name prefixes, wildcard characters, +or other "advance" features. For this reason, chances of it failing are +very small / non existent, and it provides an excellent behavior when it +comes to just analyze and run that text as a query behavior (which is +usually what a text search box does). Also, the `phrase_prefix` type can +provide a great "as you type" behavior to automatically load search +results. + +[float] +==== Other options + +* `lenient` - If set to true will cause format based failures (like +providing text to a numeric field) to be ignored. Defaults to false. diff --git a/docs/reference/query-dsl/queries/minimum-should-match.asciidoc b/docs/reference/query-dsl/queries/minimum-should-match.asciidoc new file mode 100644 index 0000000..bedd496 --- /dev/null +++ b/docs/reference/query-dsl/queries/minimum-should-match.asciidoc @@ -0,0 +1,56 @@ +[[query-dsl-minimum-should-match]] +=== Minimum Should Match + +The `minimum_should_match` parameter possible values: + +[cols="<,<,<",options="header",] +|======================================================================= +|Type |Example |Description +|Integer |`3` |Indicates a fixed value regardless of the number of +optional clauses. + +|Negative integer |`-2` |Indicates that the total number of optional +clauses, minus this number should be mandatory. + +|Percentage |`75%` |Indicates that this percent of the total number of +optional clauses are necessary. The number computed from the percentage +is rounded down and used as the minimum. + +|Negative percentage |`-25%` |Indicates that this percent of the total +number of optional clauses can be missing. The number computed from the +percentage is rounded down, before being subtracted from the total to +determine the minimum. + +|Combination |`3<90%` |A positive integer, followed by the less-than +symbol, followed by any of the previously mentioned specifiers is a +conditional specification. It indicates that if the number of optional +clauses is equal to (or less than) the integer, they are all required, +but if it's greater than the integer, the specification applies. In this +example: if there are 1 to 3 clauses they are all required, but for 4 or +more clauses only 90% are required. + +|Multiple combinations |`2<-25 9<-3` |Multiple conditional +specifications can be separated by spaces, each one only being valid for +numbers greater than the one before it. In this example: if there are 1 +or 2 clauses both are required, if there are 3-9 clauses all but 25% are +required, and if there are more than 9 clauses, all but three are +required. +|======================================================================= + +*NOTE:* + +When dealing with percentages, negative values can be used to get +different behavior in edge cases. 75% and -25% mean the same thing when +dealing with 4 clauses, but when dealing with 5 clauses 75% means 3 are +required, but -25% means 4 are required. + +If the calculations based on the specification determine that no +optional clauses are needed, the usual rules about BooleanQueries still +apply at search time (a BooleanQuery containing no required clauses must +still match at least one optional clause) + +No matter what number the calculation arrives at, a value greater than +the number of optional clauses, or a value less than 1 will never be +used. (ie: no matter how low or how high the result of the calculation +result is, the minimum number of required matches will never be lower +than 1 or greater than the number of clauses. diff --git a/docs/reference/query-dsl/queries/mlt-field-query.asciidoc b/docs/reference/query-dsl/queries/mlt-field-query.asciidoc new file mode 100644 index 0000000..e4f28bb --- /dev/null +++ b/docs/reference/query-dsl/queries/mlt-field-query.asciidoc @@ -0,0 +1,68 @@ +[[query-dsl-mlt-field-query]] +=== More Like This Field Query + +The `more_like_this_field` query is the same as the `more_like_this` +query, except it runs against a single field. It provides nicer query +DSL over the generic `more_like_this` query, and support typed fields +query (automatically wraps typed fields with type filter to match only +on the specific type). + +[source,js] +-------------------------------------------------- +{ + "more_like_this_field" : { + "name.first" : { + "like_text" : "text like this one", + "min_term_freq" : 1, + "max_query_terms" : 12 + } + } +} +-------------------------------------------------- + +`more_like_this_field` can be shortened to `mlt_field`. + +The `more_like_this_field` top level parameters include: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`like_text` |The text to find documents like it, *required*. + +|`percent_terms_to_match` |The percentage of terms to match on (float +value). Defaults to `0.3` (30 percent). + +|`min_term_freq` |The frequency below which terms will be ignored in the +source doc. The default frequency is `2`. + +|`max_query_terms` |The maximum number of query terms that will be +included in any generated query. Defaults to `25`. + +|`stop_words` |An array of stop words. Any word in this set is +considered "uninteresting" and ignored. Even if your Analyzer allows +stopwords, you might want to tell the MoreLikeThis code to ignore them, +as for the purposes of document similarity it seems reasonable to assume +that "a stop word is never interesting". + +|`min_doc_freq` |The frequency at which words will be ignored which do +not occur in at least this many docs. Defaults to `5`. + +|`max_doc_freq` |The maximum frequency in which words may still appear. +Words that appear in more than this many docs will be ignored. Defaults +to unbounded. + +|`min_word_length` |The minimum word length below which words will be +ignored. Defaults to `0`. (Old name "min_word_len" is deprecated) + +|`max_word_length` |The maximum word length above which words will be +ignored. Defaults to unbounded (`0`). (Old name "max_word_len" is deprecated) + +|`boost_terms` |Sets the boost factor to use when boosting terms. +Defaults to `1`. + +|`boost` |Sets the boost value of the query. Defaults to `1.0`. + +|`analyzer` |The analyzer that will be used to analyze the text. +Defaults to the analyzer associated with the field. +|======================================================================= + diff --git a/docs/reference/query-dsl/queries/mlt-query.asciidoc b/docs/reference/query-dsl/queries/mlt-query.asciidoc new file mode 100644 index 0000000..bea704a --- /dev/null +++ b/docs/reference/query-dsl/queries/mlt-query.asciidoc @@ -0,0 +1,67 @@ +[[query-dsl-mlt-query]] +=== More Like This Query + +More like this query find documents that are "like" provided text by +running it against one or more fields. + +[source,js] +-------------------------------------------------- +{ + "more_like_this" : { + "fields" : ["name.first", "name.last"], + "like_text" : "text like this one", + "min_term_freq" : 1, + "max_query_terms" : 12 + } +} +-------------------------------------------------- + +`more_like_this` can be shortened to `mlt`. + +The `more_like_this` top level parameters include: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`fields` |A list of the fields to run the more like this query against. +Defaults to the `_all` field. + +|`like_text` |The text to find documents like it, *required*. + +|`percent_terms_to_match` |The percentage of terms to match on (float +value). Defaults to `0.3` (30 percent). + +|`min_term_freq` |The frequency below which terms will be ignored in the +source doc. The default frequency is `2`. + +|`max_query_terms` |The maximum number of query terms that will be +included in any generated query. Defaults to `25`. + +|`stop_words` |An array of stop words. Any word in this set is +considered "uninteresting" and ignored. Even if your Analyzer allows +stopwords, you might want to tell the MoreLikeThis code to ignore them, +as for the purposes of document similarity it seems reasonable to assume +that "a stop word is never interesting". + +|`min_doc_freq` |The frequency at which words will be ignored which do +not occur in at least this many docs. Defaults to `5`. + +|`max_doc_freq` |The maximum frequency in which words may still appear. +Words that appear in more than this many docs will be ignored. Defaults +to unbounded. + +|`min_word_length` |The minimum word length below which words will be +ignored. Defaults to `0`.(Old name "min_word_len" is deprecated) + +|`max_word_length` |The maximum word length above which words will be +ignored. Defaults to unbounded (`0`). (Old name "max_word_len" is deprecated) + +|`boost_terms` |Sets the boost factor to use when boosting terms. +Defaults to `1`. + +|`boost` |Sets the boost value of the query. Defaults to `1.0`. + +|`analyzer` |The analyzer that will be used to analyze the text. +Defaults to the analyzer associated with the field. +|======================================================================= + diff --git a/docs/reference/query-dsl/queries/multi-match-query.asciidoc b/docs/reference/query-dsl/queries/multi-match-query.asciidoc new file mode 100644 index 0000000..cb098cd --- /dev/null +++ b/docs/reference/query-dsl/queries/multi-match-query.asciidoc @@ -0,0 +1,64 @@ +[[query-dsl-multi-match-query]] +=== Multi Match Query + +The `multi_match` query builds further on top of the `match` query by +allowing multiple fields to be specified. The idea here is to allow to +more easily build a concise match type query over multiple fields +instead of using a relatively more expressive query by using multiple +match queries within a `bool` query. + +The structure of the query is a bit different. Instead of a nested json +object defining the query field, there is a top json level field for +defining the query fields. Example: + +[source,js] +-------------------------------------------------- +{ + "multi_match" : { + "query" : "this is a test", + "fields" : [ "subject", "message" ] + } +} +-------------------------------------------------- + +The `multi_match` query creates either a `bool` or a `dis_max` top level +query. Each field is a query clause in this top level query. The query +clause contains the actual query (the specified 'type' defines what +query this will be). Each query clause is basically a `should` clause. + +[float] +[float] +==== Options + +All options that apply on the `match` query also apply on the +`multi_match` query. The `match` query options apply only on the +individual clauses inside the top level query. + +* `fields` - Fields to be used in the query. +* `use_dis_max` - Boolean indicating to either create a `dis_max` query +or a `bool` query. Defaults to `true`. +* `tie_breaker` - Multiplier value to balance the scores between lower +and higher scoring fields. Only applicable when `use_dis_max` is set to +true. Defaults to `0.0`. + +The query accepts all the options that a regular `match` query accepts. + +[float] +[float] +==== Boosting + +The `multi_match` query supports field boosting via `^` notation in the +fields json field. + +[source,js] +-------------------------------------------------- +{ + "multi_match" : { + "query" : "this is a test", + "fields" : [ "subject^2", "message" ] + } +} +-------------------------------------------------- + +In the above example hits in the `subject` field are 2 times more +important than in the `message` field. diff --git a/docs/reference/query-dsl/queries/multi-term-rewrite.asciidoc b/docs/reference/query-dsl/queries/multi-term-rewrite.asciidoc new file mode 100644 index 0000000..135be67 --- /dev/null +++ b/docs/reference/query-dsl/queries/multi-term-rewrite.asciidoc @@ -0,0 +1,42 @@ +[[query-dsl-multi-term-rewrite]] +=== Multi Term Query Rewrite + +Multi term queries, like +<<query-dsl-wildcard-query,wildcard>> and +<<query-dsl-prefix-query,prefix>> are called +multi term queries and end up going through a process of rewrite. This +also happens on the +<<query-dsl-query-string-query,query_string>>. +All of those queries allow to control how they will get rewritten using +the `rewrite` parameter: + +* When not set, or set to `constant_score_auto`, defaults to +automatically choosing either `constant_score_boolean` or +`constant_score_filter` based on query characteristics. +* `scoring_boolean`: A rewrite method that first translates each term +into a should clause in a boolean query, and keeps the scores as +computed by the query. Note that typically such scores are meaningless +to the user, and require non-trivial CPU to compute, so it's almost +always better to use `constant_score_auto`. This rewrite method will hit +too many clauses failure if it exceeds the boolean query limit (defaults +to `1024`). +* `constant_score_boolean`: Similar to `scoring_boolean` except scores +are not computed. Instead, each matching document receives a constant +score equal to the query's boost. This rewrite method will hit too many +clauses failure if it exceeds the boolean query limit (defaults to +`1024`). +* `constant_score_filter`: A rewrite method that first creates a private +Filter by visiting each term in sequence and marking all docs for that +term. Matching documents are assigned a constant score equal to the +query's boost. +* `top_terms_N`: A rewrite method that first translates each term into +should clause in boolean query, and keeps the scores as computed by the +query. This rewrite method only uses the top scoring terms so it will +not overflow boolean max clause count. The `N` controls the size of the +top scoring terms to use. +* `top_terms_boost_N`: A rewrite method that first translates each term +into should clause in boolean query, but the scores are only computed as +the boost. This rewrite method only uses the top scoring terms so it +will not overflow the boolean max clause count. The `N` controls the +size of the top scoring terms to use. + diff --git a/docs/reference/query-dsl/queries/nested-query.asciidoc b/docs/reference/query-dsl/queries/nested-query.asciidoc new file mode 100644 index 0000000..bc7e07c --- /dev/null +++ b/docs/reference/query-dsl/queries/nested-query.asciidoc @@ -0,0 +1,58 @@ +[[query-dsl-nested-query]] +=== Nested Query + +Nested query allows to query nested objects / docs (see +<<mapping-nested-type,nested mapping>>). The +query is executed against the nested objects / docs as if they were +indexed as separate docs (they are, internally) and resulting in the +root parent doc (or parent nested mapping). Here is a sample mapping we +will work with: + +[source,js] +-------------------------------------------------- +{ + "type1" : { + "properties" : { + "obj1" : { + "type" : "nested" + } + } + } +} +-------------------------------------------------- + +And here is a sample nested query usage: + +[source,js] +-------------------------------------------------- +{ + "nested" : { + "path" : "obj1", + "score_mode" : "avg", + "query" : { + "bool" : { + "must" : [ + { + "match" : {"obj1.name" : "blue"} + }, + { + "range" : {"obj1.count" : {"gt" : 5}} + } + ] + } + } + } +} +-------------------------------------------------- + +The query `path` points to the nested object path, and the `query` (or +`filter`) includes the query that will run on the nested docs matching +the direct path, and joining with the root parent docs. + +The `score_mode` allows to set how inner children matching affects +scoring of parent. It defaults to `avg`, but can be `total`, `max` and +`none`. + +Multi level nesting is automatically supported, and detected, resulting +in an inner nested query to automatically match the relevant nesting +level (and not root) if it exists within another nested query. diff --git a/docs/reference/query-dsl/queries/prefix-query.asciidoc b/docs/reference/query-dsl/queries/prefix-query.asciidoc new file mode 100644 index 0000000..1bcf75a --- /dev/null +++ b/docs/reference/query-dsl/queries/prefix-query.asciidoc @@ -0,0 +1,36 @@ +[[query-dsl-prefix-query]] +=== Prefix Query + +Matches documents that have fields containing terms with a specified +prefix (*not analyzed*). The prefix query maps to Lucene `PrefixQuery`. +The following matches documents where the user field contains a term +that starts with `ki`: + +[source,js] +-------------------------------------------------- +{ + "prefix" : { "user" : "ki" } +} +-------------------------------------------------- + +A boost can also be associated with the query: + +[source,js] +-------------------------------------------------- +{ + "prefix" : { "user" : { "value" : "ki", "boost" : 2.0 } } +} +-------------------------------------------------- + +Or : + +[source,js] +-------------------------------------------------- +{ + "prefix" : { "user" : { "prefix" : "ki", "boost" : 2.0 } } +} +-------------------------------------------------- + +This multi term query allows to control how it gets rewritten using the +<<query-dsl-multi-term-rewrite,rewrite>> +parameter. diff --git a/docs/reference/query-dsl/queries/query-string-query.asciidoc b/docs/reference/query-dsl/queries/query-string-query.asciidoc new file mode 100644 index 0000000..5ce0b4e --- /dev/null +++ b/docs/reference/query-dsl/queries/query-string-query.asciidoc @@ -0,0 +1,161 @@ +[[query-dsl-query-string-query]] +=== Query String Query + +A query that uses a query parser in order to parse its content. Here is +an example: + +[source,js] +-------------------------------------------------- +{ + "query_string" : { + "default_field" : "content", + "query" : "this AND that OR thus" + } +} +-------------------------------------------------- + +The `query_string` top level parameters include: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`query` |The actual query to be parsed. See <<query-string-syntax>>. + +|`default_field` |The default field for query terms if no prefix field +is specified. Defaults to the `index.query.default_field` index +settings, which in turn defaults to `_all`. + +|`default_operator` |The default operator used if no explicit operator +is specified. For example, with a default operator of `OR`, the query +`capital of Hungary` is translated to `capital OR of OR Hungary`, and +with default operator of `AND`, the same query is translated to +`capital AND of AND Hungary`. The default value is `OR`. + +|`analyzer` |The analyzer name used to analyze the query string. + +|`allow_leading_wildcard` |When set, `*` or `?` are allowed as the first +character. Defaults to `true`. + +|`lowercase_expanded_terms` |Whether terms of wildcard, prefix, fuzzy, +and range queries are to be automatically lower-cased or not (since they +are not analyzed). Default it `true`. + +|`enable_position_increments` |Set to `true` to enable position +increments in result queries. Defaults to `true`. + +|`fuzzy_max_expansions` |Controls the number of terms fuzzy queries will +expand to. Defaults to `50` + +|`fuzziness` |Set the fuzziness for fuzzy queries. Defaults +to `AUTO`. See <<fuzziness>> for allowed settings. + +|`fuzzy_prefix_length` |Set the prefix length for fuzzy queries. Default +is `0`. + +|`phrase_slop` |Sets the default slop for phrases. If zero, then exact +phrase matches are required. Default value is `0`. + +|`boost` |Sets the boost value of the query. Defaults to `1.0`. + +|`analyze_wildcard` |By default, wildcards terms in a query string are +not analyzed. By setting this value to `true`, a best effort will be +made to analyze those as well. + +|`auto_generate_phrase_queries` |Default to `false`. + +|`minimum_should_match` |A value controlling how many "should" clauses +in the resulting boolean query should match. It can be an absolute value +(`2`), a percentage (`30%`) or a +<<query-dsl-minimum-should-match,combination of +both>>. + +|`lenient` |If set to `true` will cause format based failures (like +providing text to a numeric field) to be ignored. +|======================================================================= + +When a multi term query is being generated, one can control how it gets +rewritten using the +<<query-dsl-multi-term-rewrite,rewrite>> +parameter. + +[float] +==== Default Field + +When not explicitly specifying the field to search on in the query +string syntax, the `index.query.default_field` will be used to derive +which field to search on. It defaults to `_all` field. + +So, if `_all` field is disabled, it might make sense to change it to set +a different default field. + +[float] +==== Multi Field + +The `query_string` query can also run against multiple fields. The idea +of running the `query_string` query against multiple fields is by +internally creating several queries for the same query string, each with +`default_field` that match the fields provided. Since several queries +are generated, combining them can be automatically done either using a +`dis_max` query or a simple `bool` query. For example (the `name` is +boosted by 5 using `^5` notation): + +[source,js] +-------------------------------------------------- +{ + "query_string" : { + "fields" : ["content", "name^5"], + "query" : "this AND that OR thus", + "use_dis_max" : true + } +} +-------------------------------------------------- + +Simple wildcard can also be used to search "within" specific inner +elements of the document. For example, if we have a `city` object with +several fields (or inner object with fields) in it, we can automatically +search on all "city" fields: + +[source,js] +-------------------------------------------------- +{ + "query_string" : { + "fields" : ["city.*"], + "query" : "this AND that OR thus", + "use_dis_max" : true + } +} +-------------------------------------------------- + +Another option is to provide the wildcard fields search in the query +string itself (properly escaping the `*` sign), for example: +`city.\*:something`. + +When running the `query_string` query against multiple fields, the +following additional parameters are allowed: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`use_dis_max` |Should the queries be combined using `dis_max` (set it +to `true`), or a `bool` query (set it to `false`). Defaults to `true`. + +|`tie_breaker` |When using `dis_max`, the disjunction max tie breaker. +Defaults to `0`. +|======================================================================= + +The fields parameter can also include pattern based field names, +allowing to automatically expand to the relevant fields (dynamically +introduced fields included). For example: + +[source,js] +-------------------------------------------------- +{ + "query_string" : { + "fields" : ["content", "name.*^5"], + "query" : "this AND that OR thus", + "use_dis_max" : true + } +} +-------------------------------------------------- + +include::query-string-syntax.asciidoc[] diff --git a/docs/reference/query-dsl/queries/query-string-syntax.asciidoc b/docs/reference/query-dsl/queries/query-string-syntax.asciidoc new file mode 100644 index 0000000..dbc47c6 --- /dev/null +++ b/docs/reference/query-dsl/queries/query-string-syntax.asciidoc @@ -0,0 +1,295 @@ +[[query-string-syntax]] + +==== Query string syntax + +The query string ``mini-language'' is used by the +<<query-dsl-query-string-query>> and by the +`q` query string parameter in the <<search-search,`search` API>>. + +The query string is parsed into a series of _terms_ and _operators_. A +term can be a single word -- `quick` or `brown` -- or a phrase, surrounded by +double quotes -- `"quick brown"` -- which searches for all the words in the +phrase, in the same order. + +Operators allow you to customize the search -- the available options are +explained below. + +===== Field names + +As mentioned in <<query-dsl-query-string-query>>, the `default_field` is searched for the +search terms, but it is possible to specify other fields in the query syntax: + +* where the `status` field contains `active` + + status:active + +* where the `title` field contains `quick` or `brown` + + title:(quick brown) + +* where the `author` field contains the exact phrase `"john smith"` + + author:"John Smith" + +* where any of the fields `book.title`, `book.content` or `book.date` contains + `quick` or `brown` (note how we need to escape the `*` with a backslash): + + book.\*:(quick brown) + +* where the field `title` has no value (or is missing): + + _missing_:title + +* where the field `title` has any non-null value: + + _exists_:title + +===== Wildcards + +Wildcard searches can be run on individual terms, using `?` to replace +a single character, and `*` to replace zero or more characters: + + qu?ck bro* + +Be aware that wildcard queries can use an enormous amount of memory and +perform very badly -- just think how many terms need to be queried to +match the query string `"a* b* c*"`. + +[WARNING] +====== +Allowing a wildcard at the beginning of a word (eg `"*ing"`) is particularly +heavy, because all terms in the index need to be examined, just in case +they match. Leading wildcards can be disabled by setting +`allow_leading_wildcard` to `false`. +====== + +Wildcarded terms are not analyzed by default -- they are lowercased +(`lowercase_expanded_terms` defaults to `true`) but no further analysis +is done, mainly because it is impossible to accurately analyze a word that +is missing some of its letters. However, by setting `analyze_wildcard` to +`true`, an attempt will be made to analyze wildcarded words before searching +the term list for matching terms. + +===== Regular expressions + +Regular expression patterns can be embedded in the query string by +wrapping them in forward-slashes (`"/"`): + + name:/joh?n(ath[oa]n)/ + +The supported regular expression syntax is explained in <<regexp-syntax>>. + +[WARNING] +====== +The `allow_leading_wildcard` parameter does not have any control over +regular expressions. A query string such as the following would force +Elasticsearch to visit every term in the index: + + /.*n/ + +Use with caution! +====== + +===== Fuzziness + +We can search for terms that are +similar to, but not exactly like our search terms, using the ``fuzzy'' +operator: + + quikc~ brwn~ foks~ + +This uses the +http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance[Damerau-Levenshtein distance] +to find all terms with a maximum of +two changes, where a change is the insertion, deletion +or substitution of a single character, or transposition of two adjacent +characters. + +The default _edit distance_ is `2`, but an edit distance of `1` should be +sufficient to catch 80% of all human misspellings. It can be specified as: + + quikc~1 + +===== Proximity searches + +While a phrase query (eg `"john smith"`) expects all of the terms in exactly +the same order, a proximity query allows the specified words to be further +apart or in a different order. In the same way that fuzzy queries can +specify a maximum edit distance for characters in a word, a proximity search +allows us to specify a maximum edit distance of words in a phrase: + + "fox quick"~5 + +The closer the text in a field is to the original order specified in the +query string, the more relevant that document is considered to be. When +compared to the above example query, the phrase `"quick fox"` would be +considered more relevant than `"quick brown fox"`. + +===== Ranges + +Ranges can be specified for date, numeric or string fields. Inclusive ranges +are specified with square brackets `[min TO max]` and exclusive ranges with +curly brackets `{min TO max}`. + +* All days in 2012: + + date:[2012/01/01 TO 2012/12/31] + +* Numbers 1..5 + + count:[1 TO 5] + +* Tags between `alpha` and `omega`, excluding `alpha` and `omega`: + + tag:{alpha TO omega} + +* Numbers from 10 upwards + + count:[10 TO *] + +* Dates before 2012 + + date:{* TO 2012/01/01} + +Curly and square brackets can be combined: + +* Numbers from 1 up to but not including 5 + + count:[1..5} + + +Ranges with one side unbounded can use the following syntax: + + age:>10 + age:>=10 + age:<10 + age:<=10 + +[NOTE] +=================================================================== +To combine an upper and lower bound with the simplified syntax, you +would need to join two clauses with an `AND` operator: + + age:(>=10 AND < 20) + age:(+>=10 +<20) + +=================================================================== + +The parsing of ranges in query strings can be complex and error prone. It is +much more reliable to use an explicit <<query-dsl-range-filter,`range` filter>>. + + +===== Boosting + +Use the _boost_ operator `^` to make one term more relevant than another. +For instance, if we want to find all documents about foxes, but we are +especially interested in quick foxes: + + quick^2 fox + +The default `boost` value is 1, but can be any positive floating point number. +Boosts between 0 and 1 reduce relevance. + +Boosts can also be applied to phrases or to groups: + + "john smith"^2 (foo bar)^4 + +===== Boolean operators + +By default, all terms are optional, as long as one term matches. A search +for `foo bar baz` will find any document that contains one or more of +`foo` or `bar` or `baz`. We have already discussed the `default_operator` +above which allows you to force all terms to be required, but there are +also _boolean operators_ which can be used in the query string itself +to provide more control. + +The preferred operators are `+` (this term *must* be present) and `-` +(this term *must not* be present). All other terms are optional. +For example, this query: + + quick brown +fox -news + +states that: + +* `fox` must be present +* `news` must not be present +* `quick` and `brown` are optional -- their presence increases the relevance + +The familiar operators `AND`, `OR` and `NOT` (also written `&&`, `||` and `!`) +are also supported. However, the effects of these operators can be more +complicated than is obvious at first glance. `NOT` takes precedence over +`AND`, which takes precedence over `OR`. While the `+` and `-` only affect +the term to the right of the operator, `AND` and `OR` can affect the terms to +the left and right. + +**** +Rewriting the above query using `AND`, `OR` and `NOT` demonstrates the +complexity: + +`quick OR brown AND fox AND NOT news`:: + +This is incorrect, because `brown` is now a required term. + +`(quick OR brown) AND fox AND NOT news`:: + +This is incorrect because at least one of `quick` or `brown` is now required +and the search for those terms would be scored differently from the original +query. + +`((quick AND fox) OR (brown AND fox) OR fox) AND NOT news`:: + +This form now replicates the logic from the original query correctly, but +the relevance scoring bares little resemblance to the original. + +In contrast, the same query rewritten using the <<query-dsl-match-query,`match` query>> +would look like this: + + { + "bool": { + "must": { "match": "fox" }, + "should": { "match": "quick brown" }, + "must_not": { "match": "news" } + } + } + +**** + +===== Grouping + +Multiple terms or clauses can be grouped together with parentheses, to form +sub-queries: + + (quick OR brown) AND fox + +Groups can be used to target a particular field, or to boost the result +of a sub-query: + + status:(active OR pending) title:(full text search)^2 + +===== Reserved characters + +If you need to use any of the characters which function as operators in your +query itself (and not as operators), then you should escape them with +a leading backslash. For instance, to search for `(1+1)=2`, you would +need to write your query as `\(1\+1\)=2`. + +The reserved characters are: `+ - && || ! ( ) { } [ ] ^ " ~ * ? : \ /` + +Failing to escape these special characters correctly could lead to a syntax +error which prevents your query from running. + +.Watch this space +**** +A space may also be a reserved character. For instance, if you have a +synonym list which converts `"wi fi"` to `"wifi"`, a `query_string` search +for `"wi fi"` would fail. The query string parser would interpret your +query as a search for `"wi OR fi"`, while the token stored in your +index is actually `"wifi"`. Escaping the space will protect it from +being touched by the query string parser: `"wi\ fi"`. +**** + +===== Empty Query + +If the query string is empty or only contains whitespaces the +query string is interpreted as a `no_docs_query` and will yield +an empty result set. diff --git a/docs/reference/query-dsl/queries/range-query.asciidoc b/docs/reference/query-dsl/queries/range-query.asciidoc new file mode 100644 index 0000000..cf8a9da --- /dev/null +++ b/docs/reference/query-dsl/queries/range-query.asciidoc @@ -0,0 +1,31 @@ +[[query-dsl-range-query]] +=== Range Query + +Matches documents with fields that have terms within a certain range. +The type of the Lucene query depends on the field type, for `string` +fields, the `TermRangeQuery`, while for number/date fields, the query is +a `NumericRangeQuery`. The following example returns all documents where +`age` is between `10` and `20`: + +[source,js] +-------------------------------------------------- +{ + "range" : { + "age" : { + "gte" : 10, + "lte" : 20, + "boost" : 2.0 + } + } +} +-------------------------------------------------- + +The `range` query accepts the following parameters: + +[horizontal] +`gte`:: Greater-than or equal to +`gt`:: Greater-than +`lte`:: Less-than or equal to +`lt`:: Less-than +`boost`:: Sets the boost value of the query, defaults to `1.0` + diff --git a/docs/reference/query-dsl/queries/regexp-query.asciidoc b/docs/reference/query-dsl/queries/regexp-query.asciidoc new file mode 100644 index 0000000..3345773 --- /dev/null +++ b/docs/reference/query-dsl/queries/regexp-query.asciidoc @@ -0,0 +1,54 @@ +[[query-dsl-regexp-query]] +=== Regexp Query + +The `regexp` query allows you to use regular expression term queries. +See <<regexp-syntax>> for details of the supported regular expression language. + +*Note*: The performance of a `regexp` query heavily depends on the +regular expression chosen. Matching everything like `.*` is very slow as +well as using lookaround regular expressions. If possible, you should +try to use a long prefix before your regular expression starts. Wildcard +matchers like `.*?+` will mostly lower performance. + +[source,js] +-------------------------------------------------- +{ + "regexp":{ + "name.first": "s.*y" + } +} +-------------------------------------------------- + +Boosting is also supported + +[source,js] +-------------------------------------------------- +{ + "regexp":{ + "name.first":{ + "value":"s.*y", + "boost":1.2 + } + } +} +-------------------------------------------------- + +You can also use special flags + +[source,js] +-------------------------------------------------- +{ + "regexp":{ + "name.first": "s.*y", + "flags" : "INTERSECTION|COMPLEMENT|EMPTY" + } +} +-------------------------------------------------- + +Possible flags are `ALL`, `ANYSTRING`, `AUTOMATON`, `COMPLEMENT`, +`EMPTY`, `INTERSECTION`, `INTERVAL`, or `NONE`. Please check the +http://lucene.apache.org/core/4_3_0/core/index.html?org%2Fapache%2Flucene%2Futil%2Fautomaton%2FRegExp.html[Lucene +documentation] for their meaning + + +include::regexp-syntax.asciidoc[] diff --git a/docs/reference/query-dsl/queries/regexp-syntax.asciidoc b/docs/reference/query-dsl/queries/regexp-syntax.asciidoc new file mode 100644 index 0000000..5d2c061 --- /dev/null +++ b/docs/reference/query-dsl/queries/regexp-syntax.asciidoc @@ -0,0 +1,280 @@ +[[regexp-syntax]] +==== Regular expression syntax + +Regular expression queries are supported by the `regexp` and the `query_string` +queries. The Lucene regular expression engine +is not Perl-compatible but supports a smaller range of operators. + +[NOTE] +==== +We will not attempt to explain regular expressions, but +just explain the supported operators. +==== + +===== Standard operators + +Anchoring:: ++ +-- + +Most regular expression engines allow you to match any part of a string. +If you want the regexp pattern to start at the beginning of the string or +finish at the end of the string, then you have to _anchor_ it specifically, +using `^` to indicate the beginning or `$` to indicate the end. + +Lucene's patterns are always anchored. The pattern provided must match +the entire string. For string `"abcde"`: + + ab.* # match + abcd # no match + +-- + +Allowed characters:: ++ +-- + +Any Unicode characters may be used in the pattern, but certain characters +are reserved and must be escaped. The standard reserved characters are: + +.... +. ? + * | { } [ ] ( ) " \ +.... + +If you enable optional features (see below) then these characters may +also be reserved: + + # @ & < > ~ + +Any reserved character can be escaped with a backslash `"\*"` including +a literal backslash character: `"\\"` + +Additionally, any characters (except double quotes) are interpreted literally +when surrounded by double quotes: + + john"@smith.com" + + +-- + +Match any character:: ++ +-- + +The period `"."` can be used to represent any character. For string `"abcde"`: + + ab... # match + a.c.e # match + +-- + +One-or-more:: ++ +-- + +The plus sign `"+"` can be used to repeat the preceding shortest pattern +once or more times. For string `"aaabbb"`: + + a+b+ # match + aa+bb+ # match + a+.+ # match + aa+bbb+ # no match + +-- + +Zero-or-more:: ++ +-- + +The asterisk `"*"` can be used to match the preceding shortest pattern +zero-or-more times. For string `"aaabbb`": + + a*b* # match + a*b*c* # match + .*bbb.* # match + aaa*bbb* # match + +-- + +Zero-or-one:: ++ +-- + +The question mark `"?"` makes the preceding shortest pattern optional. It +matches zero or one times. For string `"aaabbb"`: + + aaa?bbb? # match + aaaa?bbbb? # match + .....?.? # match + aa?bb? # no match + +-- + +Min-to-max:: ++ +-- + +Curly brackets `"{}"` can be used to specify a minimum and (optionally) +a maximum number of times the preceding shortest pattern can repeat. The +allowed forms are: + + {5} # repeat exactly 5 times + {2,5} # repeat at least twice and at most 5 times + {2,} # repeat at least twice + +For string `"aaabbb"`: + + a{3}b{3} # match + a{2,4}b{2,4} # match + a{2,}b{2,} # match + .{3}.{3} # match + a{4}b{4} # no match + a{4,6}b{4,6} # no match + a{4,}b{4,} # no match + +-- + +Grouping:: ++ +-- + +Parentheses `"()"` can be used to form sub-patterns. The quantity operators +listed above operate on the shortest previous pattern, which can be a group. +For string `"ababab"`: + + (ab)+ # match + ab(ab)+ # match + (..)+ # match + (...)+ # no match + (ab)* # match + abab(ab)? # match + ab(ab)? # no match + (ab){3} # match + (ab){1,2} # no match + +-- + +Alternation:: ++ +-- + +The pipe symbol `"|"` acts as an OR operator. The match will succeed if +the pattern on either the left-hand side OR the right-hand side matches. +The alternation applies to the _longest pattern_, not the shortest. +For string `"aabb"`: + + aabb|bbaa # match + aacc|bb # no match + aa(cc|bb) # match + a+|b+ # no match + a+b+|b+a+ # match + a+(b|c)+ # match + +-- + +Character classes:: ++ +-- + +Ranges of potential characters may be represented as character classes +by enclosing them in square brackets `"[]"`. A leading `^` +negates the character class. The allowed forms are: + + [abc] # 'a' or 'b' or 'c' + [a-c] # 'a' or 'b' or 'c' + [-abc] # '-' or 'a' or 'b' or 'c' + [abc\-] # '-' or 'a' or 'b' or 'c' + [^a-c] # any character except 'a' or 'b' or 'c' + [^a-c] # any character except 'a' or 'b' or 'c' + [-abc] # '-' or 'a' or 'b' or 'c' + [abc\-] # '-' or 'a' or 'b' or 'c' + +Note that the dash `"-"` indicates a range of characeters, unless it is +the first character or if it is escaped with a backslash. + +For string `"abcd"`: + + ab[cd]+ # match + [a-d]+ # match + [^a-d]+ # no match + +-- + +===== Optional operators + +These operators are only available when they are explicitly enabled, by +passing `flags` to the query. + +Multiple flags can be enabled either using the `ALL` flag, or by +concatenating flags with a pipe `"|"`: + + { + "regexp": { + "username": { + "value": "john~athon<1-5>", + "flags": "COMPLEMENT|INTERVAL" + } + } + } + +Complement:: ++ +-- + +The complement is probably the most useful option. The shortest pattern that +follows a tilde `"~"` is negated. For the string `"abcdef"`: + + ab~df # match + ab~cf # no match + a~(cd)f # match + a~(bc)f # no match + +Enabled with the `COMPLEMENT` or `ALL` flags. + +-- + +Interval:: ++ +-- + +The interval option enables the use of numeric ranges, enclosed by angle +brackets `"<>"`. For string: `"foo80"`: + + foo<1-100> # match + foo<01-100> # match + foo<001-100> # no match + +Enabled with the `INTERVAL` or `ALL` flags. + + +-- + +Intersection:: ++ +-- + +The ampersand `"&"` joins two patterns in a way that both of them have to +match. For string `"aaabbb"`: + + aaa.+&.+bbb # match + aaa&bbb # no match + +Using this feature usually means that you should rewrite your regular +expression. + +Enabled with the `INTERSECTION` or `ALL` flags. + +-- + +Any string:: ++ +-- + +The at sign `"@"` matches any string in its entirety. This could be combined +with the intersection and complement above to express ``everything except''. +For instance: + + @&~(foo.+) # anything except string beginning with "foo" + +Enabled with the `ANYSTRING` or `ALL` flags. +-- diff --git a/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc b/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc new file mode 100644 index 0000000..a817b23 --- /dev/null +++ b/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc @@ -0,0 +1,100 @@ +[[query-dsl-simple-query-string-query]] +=== Simple Query String Query + +A query that uses the SimpleQueryParser to parse its context. Unlike the +regular `query_string` query, the `simple_query_string` query will never +throw an exception, and discards invalid parts of the query. Here is +an example: + +[source,js] +-------------------------------------------------- +{ + "simple_query_string" : { + "query": "\"fried eggs\" +(eggplant | potato) -frittata", + "analyzer": "snowball", + "fields": ["body^5","_all"], + "default_operator": "and" + } +} +-------------------------------------------------- + +The `simple_query_string` top level parameters include: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`query` |The actual query to be parsed. See below for syntax. + +|`fields` |The fields to perform the parsed query against. Defaults to the +`index.query.default_field` index settings, which in turn defaults to `_all`. + +|`default_operator` |The default operator used if no explicit operator +is specified. For example, with a default operator of `OR`, the query +`capital of Hungary` is translated to `capital OR of OR Hungary`, and +with default operator of `AND`, the same query is translated to +`capital AND of AND Hungary`. The default value is `OR`. + +|`analyzer` |The analyzer used to analyze each term of the query when +creating composite queries. + +|`flags` |Flags specifying which features of the `simple_query_string` to +enable. Defaults to `ALL`. +|======================================================================= + +[float] +==== Simple Query String Syntax +The `simple_query_string` supports the following special characters: + +* `+` signifies AND operation +* `|` signifies OR operation +* `-` negates a single token +* `"` wraps a number of tokens to signify a phrase for searching +* `*` at the end of a term signifies a prefix query +* `(` and `)` signify precedence + +In order to search for any of these special characters, they will need to +be escaped with `\`. + +[float] +==== Default Field +When not explicitly specifying the field to search on in the query +string syntax, the `index.query.default_field` will be used to derive +which field to search on. It defaults to `_all` field. + +So, if `_all` field is disabled, it might make sense to change it to set +a different default field. + +[float] +==== Multi Field +The fields parameter can also include pattern based field names, +allowing to automatically expand to the relevant fields (dynamically +introduced fields included). For example: + +[source,js] +-------------------------------------------------- +{ + "simple_query_string" : { + "fields" : ["content", "name.*^5"], + "query" : "foo bar baz" + } +} +-------------------------------------------------- + +[float] +==== Flags +`simple_query_string` support multiple flags to specify which parsing features +should be enabled. It is specified as a `|`-delimited string with the +`flags` parameter: + +[source,js] +-------------------------------------------------- +{ + "simple_query_string" : { + "query" : "foo | bar & baz*", + "flags" : "OR|AND|PREFIX" + } +} +-------------------------------------------------- + +The available flags are: `ALL`, `NONE`, `AND`, `OR`, `PREFIX`, `PHRASE`, +`PRECEDENCE`, `ESCAPE`, and `WHITESPACE`.
\ No newline at end of file diff --git a/docs/reference/query-dsl/queries/span-first-query.asciidoc b/docs/reference/query-dsl/queries/span-first-query.asciidoc new file mode 100644 index 0000000..74fe7ff --- /dev/null +++ b/docs/reference/query-dsl/queries/span-first-query.asciidoc @@ -0,0 +1,20 @@ +[[query-dsl-span-first-query]] +=== Span First Query + +Matches spans near the beginning of a field. The span first query maps +to Lucene `SpanFirstQuery`. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "span_first" : { + "match" : { + "span_term" : { "user" : "kimchy" } + }, + "end" : 3 + } +} +-------------------------------------------------- + +The `match` clause can be any other span type query. The `end` controls +the maximum end position permitted in a match. diff --git a/docs/reference/query-dsl/queries/span-multi-term-query.asciidoc b/docs/reference/query-dsl/queries/span-multi-term-query.asciidoc new file mode 100644 index 0000000..76985fa --- /dev/null +++ b/docs/reference/query-dsl/queries/span-multi-term-query.asciidoc @@ -0,0 +1,30 @@ +[[query-dsl-span-multi-term-query]] +=== Span Multi Term Query + +The `span_multi` query allows you to wrap a `multi term query` (one of +fuzzy, prefix, term range or regexp query) as a `span query`, so +it can be nested. Example: + +[source,js] +-------------------------------------------------- +{ + "span_multi":{ + "match":{ + "prefix" : { "user" : { "value" : "ki" } } + } + } +} +-------------------------------------------------- + +A boost can also be associated with the query: + +[source,js] +-------------------------------------------------- +{ + "span_multi":{ + "match":{ + "prefix" : { "user" : { "value" : "ki", "boost" : 1.08 } } + } + } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/span-near-query.asciidoc b/docs/reference/query-dsl/queries/span-near-query.asciidoc new file mode 100644 index 0000000..39982e2 --- /dev/null +++ b/docs/reference/query-dsl/queries/span-near-query.asciidoc @@ -0,0 +1,27 @@ +[[query-dsl-span-near-query]] +=== Span Near Query + +Matches spans which are near one another. One can specify _slop_, the +maximum number of intervening unmatched positions, as well as whether +matches are required to be in-order. The span near query maps to Lucene +`SpanNearQuery`. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "span_near" : { + "clauses" : [ + { "span_term" : { "field" : "value1" } }, + { "span_term" : { "field" : "value2" } }, + { "span_term" : { "field" : "value3" } } + ], + "slop" : 12, + "in_order" : false, + "collect_payloads" : false + } +} +-------------------------------------------------- + +The `clauses` element is a list of one or more other span type queries +and the `slop` controls the maximum number of intervening unmatched +positions permitted. diff --git a/docs/reference/query-dsl/queries/span-not-query.asciidoc b/docs/reference/query-dsl/queries/span-not-query.asciidoc new file mode 100644 index 0000000..b035720 --- /dev/null +++ b/docs/reference/query-dsl/queries/span-not-query.asciidoc @@ -0,0 +1,24 @@ +[[query-dsl-span-not-query]] +=== Span Not Query + +Removes matches which overlap with another span query. The span not +query maps to Lucene `SpanNotQuery`. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "span_not" : { + "include" : { + "span_term" : { "field1" : "value1" } + }, + "exclude" : { + "span_term" : { "field2" : "value2" } + } + } +} +-------------------------------------------------- + +The `include` and `exclude` clauses can be any span type query. The +`include` clause is the span query whose matches are filtered, and the +`exclude` clause is the span query whose matches must not overlap those +returned. diff --git a/docs/reference/query-dsl/queries/span-or-query.asciidoc b/docs/reference/query-dsl/queries/span-or-query.asciidoc new file mode 100644 index 0000000..72a4ce8 --- /dev/null +++ b/docs/reference/query-dsl/queries/span-or-query.asciidoc @@ -0,0 +1,20 @@ +[[query-dsl-span-or-query]] +=== Span Or Query + +Matches the union of its span clauses. The span or query maps to Lucene +`SpanOrQuery`. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "span_or" : { + "clauses" : [ + { "span_term" : { "field" : "value1" } }, + { "span_term" : { "field" : "value2" } }, + { "span_term" : { "field" : "value3" } } + ] + } +} +-------------------------------------------------- + +The `clauses` element is a list of one or more other span type queries. diff --git a/docs/reference/query-dsl/queries/span-term-query.asciidoc b/docs/reference/query-dsl/queries/span-term-query.asciidoc new file mode 100644 index 0000000..9de86d4 --- /dev/null +++ b/docs/reference/query-dsl/queries/span-term-query.asciidoc @@ -0,0 +1,30 @@ +[[query-dsl-span-term-query]] +=== Span Term Query + +Matches spans containing a term. The span term query maps to Lucene +`SpanTermQuery`. Here is an example: + +[source,js] +-------------------------------------------------- +{ + "span_term" : { "user" : "kimchy" } +} +-------------------------------------------------- + +A boost can also be associated with the query: + +[source,js] +-------------------------------------------------- +{ + "span_term" : { "user" : { "value" : "kimchy", "boost" : 2.0 } } +} +-------------------------------------------------- + +Or : + +[source,js] +-------------------------------------------------- +{ + "span_term" : { "user" : { "term" : "kimchy", "boost" : 2.0 } } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/term-query.asciidoc b/docs/reference/query-dsl/queries/term-query.asciidoc new file mode 100644 index 0000000..cd9537d --- /dev/null +++ b/docs/reference/query-dsl/queries/term-query.asciidoc @@ -0,0 +1,31 @@ +[[query-dsl-term-query]] +=== Term Query + +Matches documents that have fields that contain a term (*not analyzed*). +The term query maps to Lucene `TermQuery`. The following matches +documents where the user field contains the term `kimchy`: + +[source,js] +-------------------------------------------------- +{ + "term" : { "user" : "kimchy" } +} +-------------------------------------------------- + +A boost can also be associated with the query: + +[source,js] +-------------------------------------------------- +{ + "term" : { "user" : { "value" : "kimchy", "boost" : 2.0 } } +} +-------------------------------------------------- + +Or : + +[source,js] +-------------------------------------------------- +{ + "term" : { "user" : { "term" : "kimchy", "boost" : 2.0 } } +} +-------------------------------------------------- diff --git a/docs/reference/query-dsl/queries/terms-query.asciidoc b/docs/reference/query-dsl/queries/terms-query.asciidoc new file mode 100644 index 0000000..a1f62a3 --- /dev/null +++ b/docs/reference/query-dsl/queries/terms-query.asciidoc @@ -0,0 +1,19 @@ +[[query-dsl-terms-query]] +=== Terms Query + +A query that match on any (configurable) of the provided terms. This is +a simpler syntax query for using a `bool` query with several `term` +queries in the `should` clauses. For example: + +[source,js] +-------------------------------------------------- +{ + "terms" : { + "tags" : [ "blue", "pill" ], + "minimum_should_match" : 1 + } +} +-------------------------------------------------- + +The `terms` query is also aliased with `in` as the query name for +simpler usage. diff --git a/docs/reference/query-dsl/queries/top-children-query.asciidoc b/docs/reference/query-dsl/queries/top-children-query.asciidoc new file mode 100644 index 0000000..00c32bf --- /dev/null +++ b/docs/reference/query-dsl/queries/top-children-query.asciidoc @@ -0,0 +1,71 @@ +[[query-dsl-top-children-query]] +=== Top Children Query + +The `top_children` query runs the child query with an estimated hits +size, and out of the hit docs, aggregates it into parent docs. If there +aren't enough parent docs matching the requested from/size search +request, then it is run again with a wider (more hits) search. + +The `top_children` also provide scoring capabilities, with the ability +to specify `max`, `sum` or `avg` as the score type. + +One downside of using the `top_children` is that if there are more child +docs matching the required hits when executing the child query, then the +`total_hits` result of the search response will be incorrect. + +How many hits are asked for in the first child query run is controlled +using the `factor` parameter (defaults to `5`). For example, when asking +for 10 parent docs (with `from` set to 0), then the child query will +execute with 50 hits expected. If not enough parents are found (in our +example 10), and there are still more child docs to query, then the +child search hits are expanded by multiplying by the +`incremental_factor` (defaults to `2`). + +The required parameters are the `query` and `type` (the child type to +execute the query on). Here is an example with all different parameters, +including the default values: + +[source,js] +-------------------------------------------------- +{ + "top_children" : { + "type": "blog_tag", + "query" : { + "term" : { + "tag" : "something" + } + }, + "score" : "max", + "factor" : 5, + "incremental_factor" : 2 + } +} +-------------------------------------------------- + +[float] +==== Scope + +A `_scope` can be defined on the query allowing to run facets on the +same scope name that will work against the child documents. For example: + +[source,js] +-------------------------------------------------- +{ + "top_children" : { + "_scope" : "my_scope", + "type": "blog_tag", + "query" : { + "term" : { + "tag" : "something" + } + } + } +} +-------------------------------------------------- + +[float] +==== Memory Considerations + +With the current implementation, all `_id` values are loaded to memory +(heap) in order to support fast lookups, so make sure there is enough +memory for it. diff --git a/docs/reference/query-dsl/queries/wildcard-query.asciidoc b/docs/reference/query-dsl/queries/wildcard-query.asciidoc new file mode 100644 index 0000000..d72dbec --- /dev/null +++ b/docs/reference/query-dsl/queries/wildcard-query.asciidoc @@ -0,0 +1,39 @@ +[[query-dsl-wildcard-query]] +=== Wildcard Query + +Matches documents that have fields matching a wildcard expression (*not +analyzed*). Supported wildcards are `*`, which matches any character +sequence (including the empty one), and `?`, which matches any single +character. Note this query can be slow, as it needs to iterate over many +terms. In order to prevent extremely slow wildcard queries, a wildcard +term should not start with one of the wildcards `*` or `?`. The wildcard +query maps to Lucene `WildcardQuery`. + +[source,js] +-------------------------------------------------- +{ + "wildcard" : { "user" : "ki*y" } +} +-------------------------------------------------- + +A boost can also be associated with the query: + +[source,js] +-------------------------------------------------- +{ + "wildcard" : { "user" : { "value" : "ki*y", "boost" : 2.0 } } +} +-------------------------------------------------- + +Or : + +[source,js] +-------------------------------------------------- +{ + "wildcard" : { "user" : { "wildcard" : "ki*y", "boost" : 2.0 } } +} +-------------------------------------------------- + +This multi term query allows to control how it gets rewritten using the +<<query-dsl-multi-term-rewrite,rewrite>> +parameter. |