您的位置:首页 > 其它

solr7.1.0学习笔记(9)---配置文件managed-schema(schema.xml)-样例

2018-01-02 16:56 603 查看
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements.  See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License.  You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0 
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<schema name="example" version="1.6">
<!-- 字段名由字母数字和下划线组成,不能以数字开头。以下划线开头并且以下划线结尾的字段名是
保留字段名(e.g. _version_).
-->

<!-- 如果去除此字段,必须同时去掉solrconfig.xml中的update log。
_version_ and update log 在SolrCloud中是必须的。
作用类似于hibernate中的version字段,用于乐观锁。
-->
<field name="_version_" type="long" indexed="true" stored="false" />

<!-- 如果document中内嵌document,此需要此字段。用于内嵌的document指向其父document.
-->
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />

<!-- 除非你有很充足的理由,否则不要去除"id"字段.
不要改变type属性, 不要对<uniqueKey>对应的字段进行索引时分析。 -->
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />

<!-- 以下定义了一些sample字段 -->
<field name="pre" type="preanalyzed" indexed="true" stored="true"/>
<field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
<field name="name" type="text_general" indexed="true" stored="true"/>
<field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
<field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
<field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />

<field name="weight" type="float" indexed="true" stored="true"/>
<field name="price"  type="float" indexed="true" stored="true"/>
<field name="popularity" type="int" indexed="true" stored="true" />
<field name="inStock" type="boolean" indexed="true" stored="true" />

<field name="store" type="location" indexed="true" stored="true"/>

<!-- 以下字段为解析word,PDF此类富文本所需要的字段。一些multiValued="true"的字段是因为Tika返回
就是多个值。
一些字段来自于客户端的上下文:
"content_type": 来自 HTTP headers of incoming stream
"resourcename": From SolrCell request param resource.name
如果你的应用不需要解析诸如此类的文本,则这些字段定义都不需要
-->
<field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
<field name="subject" type="text_general" indexed="true" stored="true"/>
<field name="description" type="text_general" indexed="true" stored="true"/>
<field name="comments" type="text_general" indexed="true" stored="true"/>
<field name="author" type="text_general" indexed="true" stored="true"/>
<field name="keywords" type="text_general" indexed="true" stored="true"/>
<field name="category" type="text_general" indexed="true" stored="true"/>
<field name="resourcename" type="text_general" indexed="true" stored="true"/>
<field name="url" type="text_general" indexed="true" stored="true"/>
<field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="last_modified" type="date" indexed="true" stored="true"/>
<field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="_src_" type="string" indexed="false" stored="true"/>

<!-- 以下为SolrCell解析出的word,PDF等文件的主体内容.
如下定义,可用于高亮显示搜索匹配的内容 -->
<field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>

<!-- 定义一个字段来包含所有可搜索的字段 (通过copyField 实现)  -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>

<!-- 定义一个字段用于正反向索引分词,有利于提高通配符搜索. -->
<field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>

<!--  不进行分词的 manufacturer, 为了更容易地对manufacturer进行排序或分组 -->
<field name="manu_exact" type="string" indexed="true" stored="false" docValues="false" />

<field name="payloads" type="payloads" indexed="true" stored="true"/>

<!-- 动态字段定义,用于方便地配置所有匹配pattern的字段.
例如:  name="*_i" 将匹配所有以 _i 结尾的字段(如:myid_i, z_i)
限制: "*" 必须只出现在开始或结尾 -->

<dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
<dynamicField name="*_is" type="int"    indexed="true"  stored="true"  multiValued="true"/>
<dynamicField name="*_s"  type="string"  indexed="true"  stored="true" />
<dynamicField name="*_ss" type="string"  indexed="true"  stored="true" multiValued="true"/>
<dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
<dynamicField name="*_ls" type="long"   indexed="true"  stored="true"  multiValued="true"/>
<dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
<dynamicField name="*_txt" type="text_general"   indexed="true"  stored="true" multiValued="true"/>
<dynamicField name="*_en"  type="text_en"    indexed="true"  stored="true" multiValued="true"/>
<dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_bs" type="boolean" indexed="true" stored="true"  multiValued="true"/>
<dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
<dynamicField name="*_fs" type="float"  indexed="true"  stored="true"  multiValued="true"/>
<dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
<dynamicField name="*_ds" type="double" indexed="true"  stored="true"  multiValued="true"/>

<!-- 用于索引"位置"字段类型的经度和纬度-->
<dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false" useDocValuesAsStored="false" />

<dynamicField name="*_dt"  type="date"    indexed="true"  stored="true"/>
<dynamicField name="*_dts" type="date"    indexed="true"  stored="true" multiValued="true"/>
<dynamicField name="*_p"  type="location" indexed="true" stored="true"/>

<!-- 用于加快范围查询 -->
<dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
<dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
<dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
<dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
<dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>

<dynamicField name="*_c"   type="currency" indexed="true"  stored="true"/>

<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>

<dynamicField name="random_*" type="random" />

<!-- 去掉下面的注释,将会忽略所有未定义或者不匹配动态字段的字段。否则会抛错。
如果想把未知字段自动索引或/并存储,把type="ignored" 改成其它的类型(e.g. "text"), -->
<!--dynamicField name="*" type="ignored" multiValued="true" /-->

<!-- 标识document唯一的字段   -->
<uniqueKey>id</uniqueKey>

<!-- copyField 用于在把documnet添加到索引时,copy一个字段的内容到另外一个字段。这样可以对相同的字段进行不同的索引,
或者把多个字段copy到同一个字段以简化/加快搜索  -->

<copyField source="cat" dest="text"/>
<copyField source="name" dest="text"/>
<copyField source="manu" dest="text"/>
<copyField source="features" dest="text"/>
<copyField source="includes" dest="text"/>
<copyField source="manu" dest="manu_exact"/>

<!-- Copy the price into a currency enabled field (default USD) -->
<copyField source="price" dest="price_c"/>

<!-- Text fields from SolrCell to search by default in our catch-all field -->
<copyField source="title" dest="text"/>
<copyField source="author" dest="text"/>
<copyField source="description" dest="text"/>
<copyField source="keywords" dest="text"/>
<copyField source="content" dest="text"/>
<copyField source="content_type" dest="text"/>
<copyField source="resourcename" dest="text"/>
<copyField source="url" dest="text"/>

<!-- 创建一个 string 版本的author 用于分组(facet)。例如:作者是"Mike Jackson", 则索引时,会进行分词,即
把 Mike 和 Jackson 分开进行索引,但是我们进行facet搜索时,需要把Mike和Jackson作为一个整体进行搜索,这就
需要把它们做为一个整体进行索引,type="string"的字段类型会对内容作为整体进行索引。 -->
<copyField source="author" dest="author_s"/>

<!-- "name" 属性值用于field 定义里的 type 属性值
"class"
eaec
属性值决定了fieldType的真正行为.
Class 以"solr"开头,表示的是java类所在包的包名缩写(比如: org.apache.solr.analysis)
-->

<!-- StrField 类型的field,其值会做为一个整体进行存储或索引。如果设置了docValues为true, 则必须确保此
field是单值的并且不为空。
-->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />

<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>

<!-- 默认的数字类型。想要更快的范围搜索,考虑使用 tint/tfloat/tlong/tdouble .
支持docValues,但是必须确保其单值并且不为空。
-->
<fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>

<!-- 以下数字类型的field, 将在各个精度等级保存其值,用于加快范围搜索。详见 NumericRangeQuery 的javadoc

较小的 precisionStep 值意味着更细粒度的精度等级,会少量增加索引文件大小,便是范围搜索会更快
precisionStep 为 0 的话,不会生成精度等级。
-->
<fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>

<!-- 日期字段的格式是 1995-12-31T23:59:59Z, 这是日期时间类型更加严格和权威的表示方法

结尾的"Z" 表示 UTC 时间,这是必须的。
秒可以有小数部分: 1995-12-31T23:59:59.999Z

表达示可以用来相对于"NOW"的计算值, 如...

NOW/HOUR :精确到小时
NOW-1DAY :比现在少一天的时间
NOW/DAY+6MONTHS+3DAYS:现在以后的6个月再加上3天
-->
<fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>

<!-- 类似于数字类型中的说明 -->
<fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>

<!--数据必须以Base64 编码进行发送/接收 -->
<fieldType name="binary" class="solr.BinaryField"/>

<!-- solr.TextField 允许自定义分词器和过滤器。索引时和查询时的分词器可以不同。
      详见 http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters -->

<!-- 仅使用空格分词器的fieldType -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>

<!-- 具有stopwords 和 synonyms 过滤器的标准分词器的fieldType -->
<fieldType name="managed_en" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.ManagedStopFilterFactory" managed="english" />
<filter class="solr.ManagedSynonymFilterFactory" managed="english" />
</analyzer>
</fieldType>

<!-- 普通的文本 fieldType
StandardTokenizer, 移除"stopwords.txt"中指定的词(忽略大小写)
-->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- 本例中,我们只在查询时应用同义词
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<!-- 适用于英文的field类型: StandardTokenizer, 去除英文 stop words(lang/stopwords_en.txt),
转换成小写, 进行词根化处理(如:stopping会被转化成stop进行索引),但是 protwords.txt中指定的词不会被转化。  -->
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal.
-->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- 下面的fieldType只是比上面的"text_en"多加了一个WordDelimiterFilter。
WordDelimiterFilter 的作用是把大小写有变化的词、数字和字母组合的词分开索引。这样的话,如果我们搜索"wi fi",
"WiFi" 和 "wi-fi"都会认为是匹配的。
-->
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal. -->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"  />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- 下面的例子展示了一个不容易匹配,因此也不容易出现匹配错误的fieldType.  应用于Solr示例中的SKU的值会比较好. -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>

<!-- 与text_general 相似,另外增加了一个功能:把每个分词进行反转,这样会使以通配符开头的查询会更快. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<!-- 使用 KeywordTokenizer 和各种 TokenFilterFactories的fieldType, 对可排序的字段,在排序时去掉一些内容
比如: field内容为 123test, 排序时,去掉前面的数字进行排序
-->
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<!-- KeywordTokenizer 不进行实际的分词,即输入的字符串将被认为是单个分词-->
<tokenizer class="solr.KeywordTokenizerFactory"/>
<!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive
-->
<filter class="solr.LowerCaseFilterFactory" />
<!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" />
<!-- PatternReplaceFilter 用正则表达示去替换文本中的内容
详见 http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html -->
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z])" replacement="" replace="all" />
</analyzer>
</fieldType>

<!--
Example of using PathHierarchyTokenizerFactory at index time, so
queries for paths match documents at that path, or in descendent paths
-->
<fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>

<!-- 定义被忽略的fieldType  -->
<fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

<!-- This point type indexes the coordinates as separate fields (subFields)
If subFieldType is defined, it references a type, and a dynamic field
definition is created matching *___<typename>.  Alternately, if
subFieldSuffix is defined, that is used to create the subFields.
Example: if subFieldType="double", then the coordinates would be
indexed in fields myloc_0___double,myloc_1___double.
Example: if subFieldSuffix="_d" then the coordinates would be indexed
in fields myloc_0_d,myloc_1_d
The subFields are an implementation detail of the fieldType, and end
users normally should not need to know about them.
-->
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>

<!-- Solr 4开始新增的一个表示地理位置的 fieldType. 支持 multiValued and polygon shapes.
For more information about this and other Spatial fields new to Solr 4, see: http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 -->
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />

<!-- Spatial rectangle (bounding box) field. It supports most spatial predicates, and has
special relevancy modes: score=overlapRatio|area|area2D (local-param to the query).  DocValues is recommended for
relevancy. -->
<fieldType name="bbox" class="solr.BBoxField"
geo="true" distanceUnits="kilometers" numberType="_bbox_coord" />
<fieldType name="_bbox_coord" class="solr.TrieDoubleField" precisionStep="8" docValues="true" useDocValuesAsStored="false" stored="false" />

<!-- 表示货币的fieldType. See http://wiki.apache.org/solr/MoneyFieldType Parameters:
defaultCurrency: 指定币种. Defaults to "USD"
precisionStep:   指定范围划分的精度
providerClass:   Lets you plug in other exchange provider backend:
solr.FileExchangeRateProvider is the default and takes one parameter:
currencyConfig: name of an xml file holding exchange rates
solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
-->
<fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />

<!-- some examples for different languages (generally ordered by ISO code) -->

<!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
<filter class="solr.CJKWidthFilterFactory"/>
<!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.CJKBigramFilterFactory"/>
</analyzer>
</fieldType>

<!-- Pre-analyzed field type, allows inserting arbitrary token streams and stored values. -->
<fieldType name="preanalyzed" class="solr.PreAnalyzedField">
<!-- PreAnalyzedField's builtin index analyzer just decodes the pre-analyzed token stream. -->
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>

</schema>


此文是复制另一篇博客,讲解很全面很细致,点击“传送门”,直接传到对方基地。

上一节:solr7.1.0学习笔记(8)---配置文件managed-schema(schema.xml)-analyzer,tokenizer
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: