summaryrefslogtreecommitdiff
path: root/docs/reference/query-dsl/queries/function-score-query.asciidoc
diff options
context:
space:
mode:
Diffstat (limited to 'docs/reference/query-dsl/queries/function-score-query.asciidoc')
-rw-r--r--docs/reference/query-dsl/queries/function-score-query.asciidoc491
1 files changed, 491 insertions, 0 deletions
diff --git a/docs/reference/query-dsl/queries/function-score-query.asciidoc b/docs/reference/query-dsl/queries/function-score-query.asciidoc
new file mode 100644
index 0000000..fa5b2bd
--- /dev/null
+++ b/docs/reference/query-dsl/queries/function-score-query.asciidoc
@@ -0,0 +1,491 @@
+[[query-dsl-function-score-query]]
+=== Function Score Query
+
+The `function_score` allows you to modify the score of documents that are
+retrieved by a query. This can be useful if, for example, a score
+function is computationally expensive and it is sufficient to compute
+the score on a filtered set of documents.
+
+`function_score` provides the same functionality that
+`custom_boost_factor`, `custom_score` and
+`custom_filters_score` provided
+but furthermore adds futher scoring functionality such as
+distance and recency scoring (see description below).
+
+==== Using function score
+
+To use `function_score`, the user has to define a query and one or
+several functions, that compute a new score for each document returned
+by the query.
+
+`function_score` can be used with only one function like this:
+
+[source,js]
+--------------------------------------------------
+"function_score": {
+ "(query|filter)": {},
+ "boost": "boost for the whole query",
+ "FUNCTION": {},
+ "boost_mode":"(multiply|replace|...)"
+}
+--------------------------------------------------
+
+Furthermore, several functions can be combined. In this case one can
+optionally choose to apply the function only if a document matches a
+given filter:
+
+[source,js]
+--------------------------------------------------
+"function_score": {
+ "(query|filter)": {},
+ "boost": "boost for the whole query",
+ "functions": [
+ {
+ "filter": {},
+ "FUNCTION": {}
+ },
+ {
+ "FUNCTION": {}
+ }
+ ],
+ "max_boost": number,
+ "score_mode": "(multiply|max|...)",
+ "boost_mode": "(multiply|replace|...)"
+}
+--------------------------------------------------
+
+If no filter is given with a function this is equivalent to specifying
+`"match_all": {}`
+
+First, each document is scored by the defined functons. The parameter
+`score_mode` specifies how the computed scores are combined:
+
+[horizontal]
+`multiply`:: scores are multiplied (default)
+`sum`:: scores are summed
+`avg`:: scores are averaged
+`first`:: the first function that has a matching filter
+ is applied
+`max`:: maximum score is used
+`min`:: minimum score is used
+
+The new score can be restricted to not exceed a certain limit by setting
+the `max_boost` parameter. The default for `max_boost` is FLT_MAX.
+
+Finally, the newly computed score is combined with the score of the
+query. The parameter `boost_mode` defines how:
+
+[horizontal]
+`multiply`:: query score and function score is multiplied (default)
+`replace`:: only function score is used, the query score is ignored
+`sum`:: query score and function score are added
+`avg`:: average
+`max`:: max of query score and function score
+`min`:: min of query score and function score
+
+
+==== Score functions
+
+The `function_score` query provides several types of score functions.
+
+===== Script score
+
+The `script_score` function allows you to wrap another query and customize
+the scoring of it optionally with a computation derived from other numeric
+field values in the doc using a script expression. Here is a
+simple sample:
+
+[source,js]
+--------------------------------------------------
+"script_score" : {
+ "script" : "_score * doc['my_numeric_field'].value"
+}
+--------------------------------------------------
+
+On top of the different scripting field values and expression, the
+`_score` script parameter can be used to retrieve the score based on the
+wrapped query.
+
+Scripts are cached for faster execution. If the script has parameters
+that it needs to take into account, it is preferable to reuse the same
+script, and provide parameters to it:
+
+[source,js]
+--------------------------------------------------
+"script_score": {
+ "lang": "lang",
+ "params": {
+ "param1": value1,
+ "param2": value2
+ },
+ "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
+}
+--------------------------------------------------
+
+Note that unlike the `custom_score` query, the
+score of the query is multiplied with the result of the script scoring. If
+you wish to inhibit this, set `"boost_mode": "replace"`
+
+===== Boost factor
+
+The `boost_factor` score allows you to multiply the score by the provided
+`boost_factor`. This can sometimes be desired since boost value set on
+specific queries gets normalized, while for this score function it does
+not.
+
+[source,js]
+--------------------------------------------------
+"boost_factor" : number
+--------------------------------------------------
+
+===== Random
+
+The `random_score` generates scores via a pseudo random number algorithm
+that is initialized with a `seed`.
+
+[source,js]
+--------------------------------------------------
+"random_score": {
+ "seed" : number
+}
+--------------------------------------------------
+
+===== Decay functions
+
+Decay functions score a document with a function that decays depending
+on the distance of a numeric field value of the document from a user
+given origin. This is similar to a range query, but with smooth edges
+instead of boxes.
+
+To use distance scoring on a query that has numerical fields, the user
+has to define an `origin` and a `scale` for each field. The `origin`
+is needed to define the ``central point'' from which the distance
+is calculated, and the `scale` to define the rate of decay. The
+decay function is specified as
+
+[source,js]
+--------------------------------------------------
+"DECAY_FUNCTION": {
+ "FIELD_NAME": {
+ "origin": "11, 12",
+ "scale": "2km",
+ "offset": "0km",
+ "decay": 0.33
+ }
+}
+--------------------------------------------------
+
+where `DECAY_FUNCTION` can be "linear", "exp" and "gauss" (see below). The specified field must be a numeric field. In the above example, the field is a <<mapping-geo-point-type>> and origin can be provided in geo format. `scale` and `offset` must be given with a unit in this case. If your field is a date field, you can set `scale` and `offset` as days, weeks, and so on. Example:
+
+[source,js]
+--------------------------------------------------
+ "DECAY_FUNCTION": {
+ "FIELD_NAME": {
+ "origin": "2013-09-17",
+ "scale": "10d",
+ "offset": "5d",
+ "decay" : 0.5
+ }
+ }
+--------------------------------------------------
+
+
+The format of the origin depends on the <<mapping-date-format>> defined in your mapping. If you do not define the origin, the current time is used.
+
+
+The `offset` and `decay` parameters are optional.
+
+[horizontal]
+`offset`::
+ If an `offset` is defined, the decay function will only compute a the
+ decay function for documents with a distance greater that the defined
+ `offset`. The default is 0.
+
+`decay`::
+ The `decay` parameter defines how documents are scored at the distance
+ given at `scale`. If no `decay` is defined, documents at the distance
+ `scale` will be scored 0.5.
+
+In the first example, your documents might represents hotels and contain a geo
+location field. You want to compute a decay function depending on how
+far the hotel is from a given location. You might not immediately see
+what scale to choose for the gauss function, but you can say something
+like: "At a distance of 2km from the desired location, the score should
+be reduced by one third."
+The parameter "scale" will then be adjusted automatically to assure that
+the score function computes a score of 0.5 for hotels that are 2km away
+from the desired location.
+
+
+In the second example, documents with a field value between 2013-09-12 and 2013-09-22 would get a weight of 1.0 and documents which are 15 days from that date a weight of 0.5.
+
+
+
+The `DECAY_FUNCTION` determines the shape of the decay:
+
+[horizontal]
+`gauss`::
+
+Normal decay, computed as:
++
+image:images/Gaussian.png[]
+
+`exp`::
+
+Exponential decay, computed as:
++
+image:images/Exponential.png[]
+
+
+`linear`::
+Linear decay, computed as:
++
+image:images/Linear.png[].
++
+In contrast to the normal and exponential decay, this function actually
+sets the score to 0 if the field value exceeds twice the user given
+scale value.
+
+==== Detailed example
+
+Suppose you are searching for a hotel in a certain town. Your budget is
+limited. Also, you would like the hotel to be close to the town center,
+so the farther the hotel is from the desired location the less likely
+you are to check in.
+
+You would like the query results that match your criterion (for
+example, "hotel, Nancy, non-smoker") to be scored with respect to
+distance to the town center and also the price.
+
+Intuitively, you would like to define the town center as the origin and
+maybe you are willing to walk 2km to the town center from the hotel. +
+In this case your *origin* for the location field is the town center
+and the *scale* is ~2km.
+
+If your budget is low, you would probably prefer something cheap above
+something expensive. For the price field, the *origin* would be 0 Euros
+and the *scale* depends on how much you are willing to pay, for example 20 Euros.
+
+In this example, the fields might be called "price" for the price of the
+hotel and "location" for the coordinates of this hotel.
+
+The function for `price` in this case would be
+
+[source,js]
+--------------------------------------------------
+"DECAY_FUNCTION": {
+ "price": {
+ "origin": "0",
+ "scale": "20"
+ }
+}
+--------------------------------------------------
+
+and for `location`:
+
+[source,js]
+--------------------------------------------------
+
+"DECAY_FUNCTION": {
+ "location": {
+ "origin": "11, 12",
+ "scale": "2km"
+ }
+}
+--------------------------------------------------
+
+where `DECAY_FUNCTION` can be "linear", "exp" and "gauss".
+
+Suppose you want to multiply these two functions on the original score,
+the request would look like this:
+
+[source,js]
+--------------------------------------------------
+curl 'localhost:9200/hotels/_search/' -d '{
+"query": {
+ "function_score": {
+ "functions": [
+ {
+ "DECAY_FUNCTION": {
+ "price": {
+ "origin": "0",
+ "scale": "20"
+ }
+ }
+ },
+ {
+ "DECAY_FUNCTION": {
+ "location": {
+ "origin": "11, 12",
+ "scale": "2km"
+ }
+ }
+ }
+ ],
+ "query": {
+ "match": {
+ "properties": "balcony"
+ }
+ },
+ "score_mode": "multiply"
+ }
+}
+}'
+--------------------------------------------------
+
+Next, we show how the computed score looks like for each of the three
+possible decay functions.
+
+===== Normal decay, keyword `gauss`
+
+When choosing `gauss` as the decay function in the above example, the
+contour and surface plot of the multiplier looks like this:
+
+image::https://f.cloud.github.com/assets/4320215/768157/cd0e18a6-e898-11e2-9b3c-f0145078bd6f.png[width="700px"]
+
+image::https://f.cloud.github.com/assets/4320215/768160/ec43c928-e898-11e2-8e0d-f3c4519dbd89.png[width="700px"]
+
+Suppose your original search results matches three hotels :
+
+* "Backback Nap"
+* "Drink n Drive"
+* "BnB Bellevue".
+
+"Drink n Drive" is pretty far from your defined location (nearly 2 km)
+and is not too cheap (about 13 Euros) so it gets a low factor a factor
+of 0.56. "BnB Bellevue" and "Backback Nap" are both pretty close to the
+defined location but "BnB Bellevue" is cheaper, so it gets a multiplier
+of 0.86 whereas "Backpack Nap" gets a value of 0.66.
+
+===== Exponential decay, keyword `exp`
+
+When choosing `exp` as the decay function in the above example, the
+contour and surface plot of the multiplier looks like this:
+
+image::https://f.cloud.github.com/assets/4320215/768161/082975c0-e899-11e2-86f7-174c3a729d64.png[width="700px"]
+
+image::https://f.cloud.github.com/assets/4320215/768162/0b606884-e899-11e2-907b-aefc77eefef6.png[width="700px"]
+
+===== Linear' decay, keyword `linear`
+
+When choosing `linear` as the decay function in the above example, the
+contour and surface plot of the multiplier looks like this:
+
+image::https://f.cloud.github.com/assets/4320215/768164/1775b0ca-e899-11e2-9f4a-776b406305c6.png[width="700px"]
+
+image::https://f.cloud.github.com/assets/4320215/768165/19d8b1aa-e899-11e2-91bc-6b0553e8d722.png[width="700px"]
+
+==== Supported fields for decay functions
+
+Only single valued numeric fields, including time and geo locations,
+are supported.
+
+==== What is a field is missing?
+
+If the numeric field is missing in the document, the function will
+return 1.
+
+==== Relation to `custom_boost`, `custom_score` and `custom_filters_score`
+
+The `custom_boost_factor` query
+
+[source,js]
+--------------------------------------------------
+"custom_boost_factor": {
+ "boost_factor": 5.2,
+ "query": {...}
+}
+--------------------------------------------------
+
+becomes
+
+[source,js]
+--------------------------------------------------
+"function_score": {
+ "boost_factor": 5.2,
+ "query": {...}
+}
+--------------------------------------------------
+
+The `custom_score` query
+
+[source,js]
+--------------------------------------------------
+"custom_score": {
+ "params": {
+ "param1": 2,
+ "param2": 3.1
+ },
+ "query": {...},
+ "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
+}
+--------------------------------------------------
+
+becomes
+
+[source,js]
+--------------------------------------------------
+"function_score": {
+ "boost_mode": "replace",
+ "query": {...},
+ "script_score": {
+ "params": {
+ "param1": 2,
+ "param2": 3.1
+ },
+ "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
+ }
+}
+--------------------------------------------------
+
+and the `custom_filters_score`
+
+[source,js]
+--------------------------------------------------
+"custom_filters_score": {
+ "filters": [
+ {
+ "boost_factor": "3",
+ "filter": {...}
+ },
+ {
+ "filter": {…},
+ "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
+ }
+ ],
+ "params": {
+ "param1": 2,
+ "param2": 3.1
+ },
+ "query": {...},
+ "score_mode": "first"
+}
+--------------------------------------------------
+
+becomes:
+
+[source,js]
+--------------------------------------------------
+"function_score": {
+ "functions": [
+ {
+ "boost_factor": "3",
+ "filter": {...}
+ },
+ {
+ "filter": {...},
+ "script_score": {
+ "params": {
+ "param1": 2,
+ "param2": 3.1
+ },
+ "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
+ }
+ }
+ ],
+ "query": {...},
+ "score_mode": "first"
+}
+--------------------------------------------------
+
+