diff options
Diffstat (limited to 'docs/reference/analysis/tokenizers.asciidoc')
-rw-r--r-- | docs/reference/analysis/tokenizers.asciidoc | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/docs/reference/analysis/tokenizers.asciidoc b/docs/reference/analysis/tokenizers.asciidoc new file mode 100644 index 0000000..3118b0d --- /dev/null +++ b/docs/reference/analysis/tokenizers.asciidoc @@ -0,0 +1,30 @@ +[[analysis-tokenizers]] +== Tokenizers + +Tokenizers are used to break a string down into a stream of terms +or tokens. A simple tokenizer might split the string up into terms +wherever it encounters whitespace or punctuation. + +Elasticsearch has a number of built in tokenizers which can be +used to build <<analysis-custom-analyzer,custom analyzers>>. + +include::tokenizers/standard-tokenizer.asciidoc[] + +include::tokenizers/edgengram-tokenizer.asciidoc[] + +include::tokenizers/keyword-tokenizer.asciidoc[] + +include::tokenizers/letter-tokenizer.asciidoc[] + +include::tokenizers/lowercase-tokenizer.asciidoc[] + +include::tokenizers/ngram-tokenizer.asciidoc[] + +include::tokenizers/whitespace-tokenizer.asciidoc[] + +include::tokenizers/pattern-tokenizer.asciidoc[] + +include::tokenizers/uaxurlemail-tokenizer.asciidoc[] + +include::tokenizers/pathhierarchy-tokenizer.asciidoc[] + |