Interface INoriTokenizer
Tokenizer that ships with the analysis-nori plugin
Namespace: OpenSearch.Client
Assembly: OpenSearch.Client.dll
Syntax
public interface INoriTokenizer : ITokenizer
Properties
| Edit this page View SourceDecompoundMode
The regular expression pattern, defaults to \W+.
Declaration
[DataMember(Name = "decompound_mode")]
NoriDecompoundMode? DecompoundMode { get; set; }
Property Value
Type | Description |
---|---|
NoriDecompoundMode? |
DiscardPunctuation
Whether punctuation should be discarded from the output. Defaults to true
.
Declaration
[DataMember(Name = "discard_punctuation")]
bool? DiscardPunctuation { get; set; }
Property Value
Type | Description |
---|---|
bool? |
UserDictionary
The Nori tokenizer uses the mecab-ko-dic dictionary by default. A user_dictionary with custom nouns (NNG) may be appended to the default dictionary. This property allows you to specify this file on disk
Declaration
[DataMember(Name = "user_dictionary")]
string UserDictionary { get; set; }
Property Value
Type | Description |
---|---|
string |
UserDictionaryRules
The Nori tokenizer uses the mecab-ko-dic dictionary by default. A user_dictionary with custom nouns (NNG) can be specified inline with this property
Declaration
[DataMember(Name = "user_dictionary_rules")]
IEnumerable<string> UserDictionaryRules { get; set; }
Property Value
Type | Description |
---|---|
IEnumerable<string> |