class Ferret::Analysis::TokenStream
Summary¶ ↑
A TokenStream enumerates the sequence of tokens, either from fields of a document or from query text.
This is an abstract class. Concrete subclasses are:
- Tokenizer
-
a TokenStream whose input is a string
- TokenFilter
-
a TokenStream whose input is another TokenStream
Public Instance Methods
next → token
click to toggle source
Return the next token from the TokenStream or nil if there are no more tokens.
static VALUE
frb_ts_next(VALUE self)
{
TokenStream *ts;
Token *next;
GET_TS(ts, self);
next = ts->next(ts);
if (next == NULL) {
return Qnil;
}
return get_token(next);
}
text = text → text
click to toggle source
Return the text that the TokenStream is tokenizing
static VALUE
frb_ts_get_text(VALUE self)
{
VALUE rtext = Qnil;
TokenStream *ts;
Data_Get_Struct(self, TokenStream, ts);
if ((rtext = object_get(&ts->text)) == Qnil) {
if (ts->text) {
rtext = rb_str_new2(ts->text);
object_set(&ts->text, rtext);
}
}
return rtext;
}
text = text → text
click to toggle source
Set the text attribute of the TokenStream to the text you wish to be tokenized. For example, you may do this;
token_stream.text = File.read(file_name)
static VALUE
frb_ts_set_text(VALUE self, VALUE rtext)
{
TokenStream *ts;
Data_Get_Struct(self, TokenStream, ts);
StringValue(rtext);
ts->reset(ts, rs2s(rtext));
/* prevent garbage collection */
rb_ivar_set(self, id_text, rtext);
return rtext;
}