class Ferret::Analysis::StopFilter
Summary¶ ↑
A StopFilter filters stop-words from a TokenStream. Stop-words are words that you don't wish to be index. Usually they will be common words like “the” and “and” although you can specify whichever words you want.
Example¶ ↑
["the", "pig", "and", "whistle"] => ["pig", "whistle"]
Public Class Methods
new(token_stream) → token_stream
click to toggle source
new(token_stream, ["the", "and", "it"]) → token_stream
Create an StopFilter which removes stop-words from a TokenStream. You can optionally specify the stopwords you wish to have removed.
- token_stream
-
TokenStream to be filtered
- stop_words
-
Array of stop-words you wish to be filtered out. This defaults to a list of English stop-words. The Ferret::Analysis contains a number of stop-word lists.
static VALUE
frb_stop_filter_init(int argc, VALUE *argv, VALUE self)
{
VALUE rsub_ts, rstop_words;
TokenStream *ts;
rb_scan_args(argc, argv, "11", &rsub_ts, &rstop_words);
ts = frb_get_cwrapped_rts(rsub_ts);
if (rstop_words != Qnil) {
char **stop_words = get_stopwords(rstop_words);
ts = stop_filter_new_with_words(ts, (const char **)stop_words);
free(stop_words);
} else {
ts = stop_filter_new(ts);
}
object_add(&(TkFilt(ts)->sub_ts), rsub_ts);
Frt_Wrap_Struct(self, &frb_tf_mark, &frb_tf_free, ts);
object_add(ts, self);
return self;
}