diff options
Diffstat (limited to 'docs/reference/analysis/analyzers/snowball-analyzer.asciidoc')
-rw-r--r-- | docs/reference/analysis/analyzers/snowball-analyzer.asciidoc | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc b/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc new file mode 100644 index 0000000..64804fc --- /dev/null +++ b/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc @@ -0,0 +1,64 @@ +[[analysis-snowball-analyzer]] +=== Snowball Analyzer + +An analyzer of type `snowball` that uses the +<<analysis-standard-tokenizer,standard +tokenizer>>, with +<<analysis-standard-tokenfilter,standard +filter>>, +<<analysis-lowercase-tokenfilter,lowercase +filter>>, +<<analysis-stop-tokenfilter,stop +filter>>, and +<<analysis-snowball-tokenfilter,snowball +filter>>. + +The Snowball Analyzer is a stemming analyzer from Lucene that is +originally based on the snowball project from +http://snowball.tartarus.org[snowball.tartarus.org]. + +Sample usage: + +[source,js] +-------------------------------------------------- +{ + "index" : { + "analysis" : { + "analyzer" : { + "my_analyzer" : { + "type" : "snowball", + "language" : "English" + } + } + } + } +} +-------------------------------------------------- + +The `language` parameter can have the same values as the +<<analysis-snowball-tokenfilter,snowball +filter>> and defaults to `English`. Note that not all the language +analyzers have a default set of stopwords provided. + +The `stopwords` parameter can be used to provide stopwords for the +languages that have no defaults, or to simply replace the default set +with your custom list. Check <<analysis-stop-analyzer,Stop Analyzer>> +for more details. A default set of stopwords for many of these +languages is available from for instance +https://github.com/apache/lucene-solr/tree/trunk/lucene/analysis/common/src/resources/org/apache/lucene/analysis/[here] +and +https://github.com/apache/lucene-solr/tree/trunk/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball[here.] + +A sample configuration (in YAML format) specifying Swedish with +stopwords: + +[source,js] +-------------------------------------------------- +index : + analysis : + analyzer : + my_analyzer: + type: snowball + language: Swedish + stopwords: "och,det,att,i,en,jag,hon,som,han,på,den,med,var,sig,för,så,till,är,men,ett,om,hade,de,av,icke,mig,du,henne,då,sin,nu,har,inte,hans,honom,skulle,hennes,där,min,man,ej,vid,kunde,något,från,ut,när,efter,upp,vi,dem,vara,vad,över,än,dig,kan,sina,här,ha,mot,alla,under,någon,allt,mycket,sedan,ju,denna,själv,detta,åt,utan,varit,hur,ingen,mitt,ni,bli,blev,oss,din,dessa,några,deras,blir,mina,samma,vilken,er,sådan,vår,blivit,dess,inom,mellan,sådant,varför,varje,vilka,ditt,vem,vilket,sitta,sådana,vart,dina,vars,vårt,våra,ert,era,vilkas" +-------------------------------------------------- |