summaryrefslogtreecommitdiff
path: root/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc
diff options
context:
space:
mode:
Diffstat (limited to 'docs/reference/analysis/analyzers/snowball-analyzer.asciidoc')
-rw-r--r--docs/reference/analysis/analyzers/snowball-analyzer.asciidoc64
1 files changed, 64 insertions, 0 deletions
diff --git a/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc b/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc
new file mode 100644
index 0000000..64804fc
--- /dev/null
+++ b/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc
@@ -0,0 +1,64 @@
+[[analysis-snowball-analyzer]]
+=== Snowball Analyzer
+
+An analyzer of type `snowball` that uses the
+<<analysis-standard-tokenizer,standard
+tokenizer>>, with
+<<analysis-standard-tokenfilter,standard
+filter>>,
+<<analysis-lowercase-tokenfilter,lowercase
+filter>>,
+<<analysis-stop-tokenfilter,stop
+filter>>, and
+<<analysis-snowball-tokenfilter,snowball
+filter>>.
+
+The Snowball Analyzer is a stemming analyzer from Lucene that is
+originally based on the snowball project from
+http://snowball.tartarus.org[snowball.tartarus.org].
+
+Sample usage:
+
+[source,js]
+--------------------------------------------------
+{
+ "index" : {
+ "analysis" : {
+ "analyzer" : {
+ "my_analyzer" : {
+ "type" : "snowball",
+ "language" : "English"
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
+
+The `language` parameter can have the same values as the
+<<analysis-snowball-tokenfilter,snowball
+filter>> and defaults to `English`. Note that not all the language
+analyzers have a default set of stopwords provided.
+
+The `stopwords` parameter can be used to provide stopwords for the
+languages that have no defaults, or to simply replace the default set
+with your custom list. Check <<analysis-stop-analyzer,Stop Analyzer>>
+for more details. A default set of stopwords for many of these
+languages is available from for instance
+https://github.com/apache/lucene-solr/tree/trunk/lucene/analysis/common/src/resources/org/apache/lucene/analysis/[here]
+and
+https://github.com/apache/lucene-solr/tree/trunk/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball[here.]
+
+A sample configuration (in YAML format) specifying Swedish with
+stopwords:
+
+[source,js]
+--------------------------------------------------
+index :
+ analysis :
+ analyzer :
+ my_analyzer:
+ type: snowball
+ language: Swedish
+ stopwords: "och,det,att,i,en,jag,hon,som,han,på,den,med,var,sig,för,så,till,är,men,ett,om,hade,de,av,icke,mig,du,henne,då,sin,nu,har,inte,hans,honom,skulle,hennes,där,min,man,ej,vid,kunde,något,från,ut,när,efter,upp,vi,dem,vara,vad,över,än,dig,kan,sina,här,ha,mot,alla,under,någon,allt,mycket,sedan,ju,denna,själv,detta,åt,utan,varit,hur,ingen,mitt,ni,bli,blev,oss,din,dessa,några,deras,blir,mina,samma,vilken,er,sådan,vår,blivit,dess,inom,mellan,sådant,varför,varje,vilka,ditt,vem,vilket,sitta,sådana,vart,dina,vars,vårt,våra,ert,era,vilkas"
+--------------------------------------------------