# SPDX-License-Identifier: Apache-2.0## The OpenSearch Contributors require contributions made to# this file be licensed under the Apache-2.0 license or a# compatible open source license.## Modifications Copyright OpenSearch Contributors. See# GitHub history for details.## Licensed to Elasticsearch B.V. under one or more contributor# license agreements. See the NOTICE file distributed with# this work for additional information regarding copyright# ownership. Elasticsearch B.V. licenses this file to you under# the Apache License, Version 2.0 (the "License"); you may# not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.fromtypingimportAny,Optionalfromopensearchpy.connection.connectionsimportget_connectionfrom.utilsimportAttrDict,DslBase,mergeclassAnalysisBase:@classmethoddef_type_shortcut(cls:Any,name_or_instance:Any,type:Any=None,**kwargs:Any)->Any:ifisinstance(name_or_instance,cls):iftypeorkwargs:raiseValueError(f"{cls.__name__}() cannot accept parameters.")returnname_or_instanceifnot(typeorkwargs):returncls.get_dsl_class("builtin")(name_or_instance)returncls.get_dsl_class(type,"custom")(name_or_instance,typeor"custom",**kwargs)classCustomAnalysis:name:Optional[str]="custom"def__init__(self,filter_name:str,builtin_type:str="custom",**kwargs:Any)->None:self._builtin_type=builtin_typeself._name=filter_namesuper().__init__(**kwargs)defto_dict(self)->Any:# only name to present in listsreturnself._namedefget_definition(self)->Any:d=super().to_dict()# type: ignored=d.pop(self.name)d["type"]=self._builtin_typereturndclassCustomAnalysisDefinition(CustomAnalysis):defget_analysis_definition(self:Any)->Any:out={self._type_name:{self._name:self.get_definition()}}t:Any=getattr(self,"tokenizer",None)if"tokenizer"inself._param_defsandhasattr(t,"get_definition"):out["tokenizer"]={t._name:t.get_definition()}filters={f._name:f.get_definition()forfinself.filterifhasattr(f,"get_definition")}iffilters:out["filter"]=filters# any sub filter definitions like multiplexers etc?forfinself.filter:ifhasattr(f,"get_analysis_definition"):d=f.get_analysis_definition()ifd:merge(out,d,True)char_filters={f._name:f.get_definition()forfinself.char_filterifhasattr(f,"get_definition")}ifchar_filters:out["char_filter"]=char_filtersreturnoutclassBuiltinAnalysis:name:Optional[str]="builtin"def__init__(self,name:Any)->None:self._name=namesuper().__init__()defto_dict(self)->Any:# only name to present in listsreturnself._name
classBuiltinAnalyzer(BuiltinAnalysis,Analyzer):defget_analysis_definition(self)->Any:return{}classCustomAnalyzer(CustomAnalysisDefinition,Analyzer):_param_defs={"filter":{"type":"token_filter","multi":True},"char_filter":{"type":"char_filter","multi":True},"tokenizer":{"type":"tokenizer"},}defsimulate(self,text:Any,using:str="default",explain:bool=False,attributes:Any=None,)->Any:""" Use the Analyze API of opensearch to test the outcome of this analyzer. :arg text: Text to be analyzed :arg using: connection alias to use, defaults to ``'default'`` :arg explain: will output all token attributes for each token. You can filter token attributes you want to output by setting ``attributes`` option. :arg attributes: if ``explain`` is specified, filter the token attributes to return. """opensearch=get_connection(using)body={"text":text,"explain":explain}ifattributes:body["attributes"]=attributesdefinition=self.get_analysis_definition()analyzer_def=self.get_definition()forsectionin("tokenizer","char_filter","filter"):ifsectionnotinanalyzer_def:continuesec_def=definition.get(section,{})sec_names=analyzer_def[section]ifisinstance(sec_names,str):body[section]=sec_def.get(sec_names,sec_names)else:body[section]=[sec_def.get(sec_name,sec_name)forsec_nameinsec_names]ifself._builtin_type!="custom":body["analyzer"]=self._builtin_typereturnAttrDict(opensearch.indices.analyze(body=body))classNormalizer(AnalysisBase,DslBase):_type_name:str="normalizer"name:Optional[str]=NoneclassBuiltinNormalizer(BuiltinAnalysis,Normalizer):defget_analysis_definition(self)->Any:return{}classCustomNormalizer(CustomAnalysisDefinition,Normalizer):_param_defs={"filter":{"type":"token_filter","multi":True},"char_filter":{"type":"char_filter","multi":True},}classTokenizer(AnalysisBase,DslBase):_type_name:str="tokenizer"name:Optional[str]=NoneclassBuiltinTokenizer(BuiltinAnalysis,Tokenizer):passclassCustomTokenizer(CustomAnalysis,Tokenizer):passclassTokenFilter(AnalysisBase,DslBase):_type_name:str="token_filter"name:Optional[str]=NoneclassBuiltinTokenFilter(BuiltinAnalysis,TokenFilter):passclassCustomTokenFilter(CustomAnalysis,TokenFilter):passclassMultiplexerTokenFilter(CustomTokenFilter):name="multiplexer"defget_definition(self)->Any:d=super(CustomTokenFilter,self).get_definition()if"filters"ind:d["filters"]=[# comma delimited string given by user(fsifisinstance(fs,str)else# list of strings or TokenFilter objects", ".join(f.to_dict()ifhasattr(f,"to_dict")elsefforfinfs))forfsinself.filters]returnddefget_analysis_definition(self)->Any:ifnothasattr(self,"filters"):return{}fs:Any={}d={"filter":fs}forfiltersinself.filters:ifisinstance(filters,str):continuefs.update({f._name:f.get_definition()forfinfiltersifhasattr(f,"get_definition")})returndclassConditionalTokenFilter(CustomTokenFilter):name="condition"defget_definition(self)->Any:d=super(CustomTokenFilter,self).get_definition()if"filter"ind:d["filter"]=[f.to_dict()ifhasattr(f,"to_dict")elsefforfinself.filter]returnddefget_analysis_definition(self)->Any:ifnothasattr(self,"filter"):return{}return{"filter":{f._name:f.get_definition()forfinself.filterifhasattr(f,"get_definition")}}classCharFilter(AnalysisBase,DslBase):_type_name:str="char_filter"name:Optional[str]=NoneclassBuiltinCharFilter(BuiltinAnalysis,CharFilter):passclassCustomCharFilter(CustomAnalysis,CharFilter):pass# shortcuts for direct useanalyzer=Analyzer._type_shortcuttokenizer=Tokenizer._type_shortcuttoken_filter=TokenFilter._type_shortcutchar_filter=CharFilter._type_shortcutnormalizer=Normalizer._type_shortcut__all__=["tokenizer","analyzer","char_filter","token_filter","normalizer"]