diff options
Diffstat (limited to 'docs/reference/query-dsl/queries/function-score-query.asciidoc')
-rw-r--r-- | docs/reference/query-dsl/queries/function-score-query.asciidoc | 491 |
1 files changed, 491 insertions, 0 deletions
diff --git a/docs/reference/query-dsl/queries/function-score-query.asciidoc b/docs/reference/query-dsl/queries/function-score-query.asciidoc new file mode 100644 index 0000000..fa5b2bd --- /dev/null +++ b/docs/reference/query-dsl/queries/function-score-query.asciidoc @@ -0,0 +1,491 @@ +[[query-dsl-function-score-query]] +=== Function Score Query + +The `function_score` allows you to modify the score of documents that are +retrieved by a query. This can be useful if, for example, a score +function is computationally expensive and it is sufficient to compute +the score on a filtered set of documents. + +`function_score` provides the same functionality that +`custom_boost_factor`, `custom_score` and +`custom_filters_score` provided +but furthermore adds futher scoring functionality such as +distance and recency scoring (see description below). + +==== Using function score + +To use `function_score`, the user has to define a query and one or +several functions, that compute a new score for each document returned +by the query. + +`function_score` can be used with only one function like this: + +[source,js] +-------------------------------------------------- +"function_score": { + "(query|filter)": {}, + "boost": "boost for the whole query", + "FUNCTION": {}, + "boost_mode":"(multiply|replace|...)" +} +-------------------------------------------------- + +Furthermore, several functions can be combined. In this case one can +optionally choose to apply the function only if a document matches a +given filter: + +[source,js] +-------------------------------------------------- +"function_score": { + "(query|filter)": {}, + "boost": "boost for the whole query", + "functions": [ + { + "filter": {}, + "FUNCTION": {} + }, + { + "FUNCTION": {} + } + ], + "max_boost": number, + "score_mode": "(multiply|max|...)", + "boost_mode": "(multiply|replace|...)" +} +-------------------------------------------------- + +If no filter is given with a function this is equivalent to specifying +`"match_all": {}` + +First, each document is scored by the defined functons. The parameter +`score_mode` specifies how the computed scores are combined: + +[horizontal] +`multiply`:: scores are multiplied (default) +`sum`:: scores are summed +`avg`:: scores are averaged +`first`:: the first function that has a matching filter + is applied +`max`:: maximum score is used +`min`:: minimum score is used + +The new score can be restricted to not exceed a certain limit by setting +the `max_boost` parameter. The default for `max_boost` is FLT_MAX. + +Finally, the newly computed score is combined with the score of the +query. The parameter `boost_mode` defines how: + +[horizontal] +`multiply`:: query score and function score is multiplied (default) +`replace`:: only function score is used, the query score is ignored +`sum`:: query score and function score are added +`avg`:: average +`max`:: max of query score and function score +`min`:: min of query score and function score + + +==== Score functions + +The `function_score` query provides several types of score functions. + +===== Script score + +The `script_score` function allows you to wrap another query and customize +the scoring of it optionally with a computation derived from other numeric +field values in the doc using a script expression. Here is a +simple sample: + +[source,js] +-------------------------------------------------- +"script_score" : { + "script" : "_score * doc['my_numeric_field'].value" +} +-------------------------------------------------- + +On top of the different scripting field values and expression, the +`_score` script parameter can be used to retrieve the score based on the +wrapped query. + +Scripts are cached for faster execution. If the script has parameters +that it needs to take into account, it is preferable to reuse the same +script, and provide parameters to it: + +[source,js] +-------------------------------------------------- +"script_score": { + "lang": "lang", + "params": { + "param1": value1, + "param2": value2 + }, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" +} +-------------------------------------------------- + +Note that unlike the `custom_score` query, the +score of the query is multiplied with the result of the script scoring. If +you wish to inhibit this, set `"boost_mode": "replace"` + +===== Boost factor + +The `boost_factor` score allows you to multiply the score by the provided +`boost_factor`. This can sometimes be desired since boost value set on +specific queries gets normalized, while for this score function it does +not. + +[source,js] +-------------------------------------------------- +"boost_factor" : number +-------------------------------------------------- + +===== Random + +The `random_score` generates scores via a pseudo random number algorithm +that is initialized with a `seed`. + +[source,js] +-------------------------------------------------- +"random_score": { + "seed" : number +} +-------------------------------------------------- + +===== Decay functions + +Decay functions score a document with a function that decays depending +on the distance of a numeric field value of the document from a user +given origin. This is similar to a range query, but with smooth edges +instead of boxes. + +To use distance scoring on a query that has numerical fields, the user +has to define an `origin` and a `scale` for each field. The `origin` +is needed to define the ``central point'' from which the distance +is calculated, and the `scale` to define the rate of decay. The +decay function is specified as + +[source,js] +-------------------------------------------------- +"DECAY_FUNCTION": { + "FIELD_NAME": { + "origin": "11, 12", + "scale": "2km", + "offset": "0km", + "decay": 0.33 + } +} +-------------------------------------------------- + +where `DECAY_FUNCTION` can be "linear", "exp" and "gauss" (see below). The specified field must be a numeric field. In the above example, the field is a <<mapping-geo-point-type>> and origin can be provided in geo format. `scale` and `offset` must be given with a unit in this case. If your field is a date field, you can set `scale` and `offset` as days, weeks, and so on. Example: + +[source,js] +-------------------------------------------------- + "DECAY_FUNCTION": { + "FIELD_NAME": { + "origin": "2013-09-17", + "scale": "10d", + "offset": "5d", + "decay" : 0.5 + } + } +-------------------------------------------------- + + +The format of the origin depends on the <<mapping-date-format>> defined in your mapping. If you do not define the origin, the current time is used. + + +The `offset` and `decay` parameters are optional. + +[horizontal] +`offset`:: + If an `offset` is defined, the decay function will only compute a the + decay function for documents with a distance greater that the defined + `offset`. The default is 0. + +`decay`:: + The `decay` parameter defines how documents are scored at the distance + given at `scale`. If no `decay` is defined, documents at the distance + `scale` will be scored 0.5. + +In the first example, your documents might represents hotels and contain a geo +location field. You want to compute a decay function depending on how +far the hotel is from a given location. You might not immediately see +what scale to choose for the gauss function, but you can say something +like: "At a distance of 2km from the desired location, the score should +be reduced by one third." +The parameter "scale" will then be adjusted automatically to assure that +the score function computes a score of 0.5 for hotels that are 2km away +from the desired location. + + +In the second example, documents with a field value between 2013-09-12 and 2013-09-22 would get a weight of 1.0 and documents which are 15 days from that date a weight of 0.5. + + + +The `DECAY_FUNCTION` determines the shape of the decay: + +[horizontal] +`gauss`:: + +Normal decay, computed as: ++ +image:images/Gaussian.png[] + +`exp`:: + +Exponential decay, computed as: ++ +image:images/Exponential.png[] + + +`linear`:: +Linear decay, computed as: ++ +image:images/Linear.png[]. ++ +In contrast to the normal and exponential decay, this function actually +sets the score to 0 if the field value exceeds twice the user given +scale value. + +==== Detailed example + +Suppose you are searching for a hotel in a certain town. Your budget is +limited. Also, you would like the hotel to be close to the town center, +so the farther the hotel is from the desired location the less likely +you are to check in. + +You would like the query results that match your criterion (for +example, "hotel, Nancy, non-smoker") to be scored with respect to +distance to the town center and also the price. + +Intuitively, you would like to define the town center as the origin and +maybe you are willing to walk 2km to the town center from the hotel. + +In this case your *origin* for the location field is the town center +and the *scale* is ~2km. + +If your budget is low, you would probably prefer something cheap above +something expensive. For the price field, the *origin* would be 0 Euros +and the *scale* depends on how much you are willing to pay, for example 20 Euros. + +In this example, the fields might be called "price" for the price of the +hotel and "location" for the coordinates of this hotel. + +The function for `price` in this case would be + +[source,js] +-------------------------------------------------- +"DECAY_FUNCTION": { + "price": { + "origin": "0", + "scale": "20" + } +} +-------------------------------------------------- + +and for `location`: + +[source,js] +-------------------------------------------------- + +"DECAY_FUNCTION": { + "location": { + "origin": "11, 12", + "scale": "2km" + } +} +-------------------------------------------------- + +where `DECAY_FUNCTION` can be "linear", "exp" and "gauss". + +Suppose you want to multiply these two functions on the original score, +the request would look like this: + +[source,js] +-------------------------------------------------- +curl 'localhost:9200/hotels/_search/' -d '{ +"query": { + "function_score": { + "functions": [ + { + "DECAY_FUNCTION": { + "price": { + "origin": "0", + "scale": "20" + } + } + }, + { + "DECAY_FUNCTION": { + "location": { + "origin": "11, 12", + "scale": "2km" + } + } + } + ], + "query": { + "match": { + "properties": "balcony" + } + }, + "score_mode": "multiply" + } +} +}' +-------------------------------------------------- + +Next, we show how the computed score looks like for each of the three +possible decay functions. + +===== Normal decay, keyword `gauss` + +When choosing `gauss` as the decay function in the above example, the +contour and surface plot of the multiplier looks like this: + +image::https://f.cloud.github.com/assets/4320215/768157/cd0e18a6-e898-11e2-9b3c-f0145078bd6f.png[width="700px"] + +image::https://f.cloud.github.com/assets/4320215/768160/ec43c928-e898-11e2-8e0d-f3c4519dbd89.png[width="700px"] + +Suppose your original search results matches three hotels : + +* "Backback Nap" +* "Drink n Drive" +* "BnB Bellevue". + +"Drink n Drive" is pretty far from your defined location (nearly 2 km) +and is not too cheap (about 13 Euros) so it gets a low factor a factor +of 0.56. "BnB Bellevue" and "Backback Nap" are both pretty close to the +defined location but "BnB Bellevue" is cheaper, so it gets a multiplier +of 0.86 whereas "Backpack Nap" gets a value of 0.66. + +===== Exponential decay, keyword `exp` + +When choosing `exp` as the decay function in the above example, the +contour and surface plot of the multiplier looks like this: + +image::https://f.cloud.github.com/assets/4320215/768161/082975c0-e899-11e2-86f7-174c3a729d64.png[width="700px"] + +image::https://f.cloud.github.com/assets/4320215/768162/0b606884-e899-11e2-907b-aefc77eefef6.png[width="700px"] + +===== Linear' decay, keyword `linear` + +When choosing `linear` as the decay function in the above example, the +contour and surface plot of the multiplier looks like this: + +image::https://f.cloud.github.com/assets/4320215/768164/1775b0ca-e899-11e2-9f4a-776b406305c6.png[width="700px"] + +image::https://f.cloud.github.com/assets/4320215/768165/19d8b1aa-e899-11e2-91bc-6b0553e8d722.png[width="700px"] + +==== Supported fields for decay functions + +Only single valued numeric fields, including time and geo locations, +are supported. + +==== What is a field is missing? + +If the numeric field is missing in the document, the function will +return 1. + +==== Relation to `custom_boost`, `custom_score` and `custom_filters_score` + +The `custom_boost_factor` query + +[source,js] +-------------------------------------------------- +"custom_boost_factor": { + "boost_factor": 5.2, + "query": {...} +} +-------------------------------------------------- + +becomes + +[source,js] +-------------------------------------------------- +"function_score": { + "boost_factor": 5.2, + "query": {...} +} +-------------------------------------------------- + +The `custom_score` query + +[source,js] +-------------------------------------------------- +"custom_score": { + "params": { + "param1": 2, + "param2": 3.1 + }, + "query": {...}, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" +} +-------------------------------------------------- + +becomes + +[source,js] +-------------------------------------------------- +"function_score": { + "boost_mode": "replace", + "query": {...}, + "script_score": { + "params": { + "param1": 2, + "param2": 3.1 + }, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" + } +} +-------------------------------------------------- + +and the `custom_filters_score` + +[source,js] +-------------------------------------------------- +"custom_filters_score": { + "filters": [ + { + "boost_factor": "3", + "filter": {...} + }, + { + "filter": {â¦}, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" + } + ], + "params": { + "param1": 2, + "param2": 3.1 + }, + "query": {...}, + "score_mode": "first" +} +-------------------------------------------------- + +becomes: + +[source,js] +-------------------------------------------------- +"function_score": { + "functions": [ + { + "boost_factor": "3", + "filter": {...} + }, + { + "filter": {...}, + "script_score": { + "params": { + "param1": 2, + "param2": 3.1 + }, + "script": "_score * doc['my_numeric_field'].value / pow(param1, param2)" + } + } + ], + "query": {...}, + "score_mode": "first" +} +-------------------------------------------------- + + |