您的位置:首页 > 大数据 > 人工智能

Solr5 DataImport 处理1对多关系

2015-06-18 12:02 656 查看
“问题” 和“派发处理” 是2个对象。 每个问题可以被派发N次, 最后结束问题。

实际查询中数据量可能很大,同时即可以从问题查派发部门, 也可以从派发情况查询问题(如本部门处理的问题)

使用的技巧就是:增加doc_id, doc_type字段。 实现多Entity查询。

schema.xml



1 <?xml version="1.0" encoding="UTF-8" ?>
2 <schema name="uum" version="1.2">
3   <types>
4     <fieldType name="boolean" class="solr.BoolField"/>
5     <fieldType name="date" class="solr.TrieDateField"/>
6     <fieldType name="float" class="solr.TrieFloatField"/>
7     <fieldType name="int" class="solr.TrieIntField"/>
8     <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
9     <fieldType name="string" class="solr.StrField"/>
10     <fieldType name="url" class="solr.StrField" indexed="false" stored="true" />
11
12     <fieldType name="simpletext"
13                class="solr.TextField"
14                positionIncrementGap="100">
15       <analyzer>
16         <tokenizer class="solr.StandardTokenizerFactory"/>
17         <filter class="solr.LowerCaseFilterFactory"/>
18       </analyzer>
19     </fieldType>
20
21     <fieldType name="ignored" class="solr.StrField"
22                indexed="false" stored="false" />
23
24    <fieldType name="date_l" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
25
26   </types>
27
28   <fields>
29
30     <!--
31        FIELDS THAT ARE IN DOCS OF MULTIPLE TYPES
32       -->
33
34     <!-- this will be our uniqueKey, so it has to be distinct across
35          all types of documents
36       -->
37     <field name="doc_id" type="string" />
38
39     <!-- the type (or domain) of our document -->
40     <field name="doc_type" type="string" />
41
42     <!-- external URLs -->
43     <dynamicField name="*_url" type="url" multiValued="false" />
44     <dynamicField name="*_urls" type="url" multiValued="true"/>
45
46     <!-- dates -->
47     <dynamicField name="*_dt" type="date" />
48
49     <!-- numeric values that might come in hand for relevancy biasing
50          (they all relate to popularity)
51       -->
52     <dynamicField name="*_count" type="int" multiValued="false"/>
53
54     <field name="_version_" type="long" indexed="true" stored="true"/>
55     <field name="_root_" type="string" indexed="true" stored="false"/>
56     <!-- Field used by Suggester for autocompletion -->
57     <field name="autocomplete"
58            type="simpletext"
59            stored="false"
60            multiValued="true" />
61
62     <!-- quick search field -->
63     <field name="catchall"
64            type="simpletext"
65            stored="false"
66            omitNorms="true"
67            multiValued="true" />
68     <field name="ID" type="string" multiValued="false"/>
69
70     <!--
71        PETITION
72       -->
73     <field name="TenantId"  type="string" multiValued="false"/>
74     <field name="PetitionId"  type="string" multiValued="false"/>
75     <field name="PetitionNumber"  type="string" multiValued="false"/>
76     <field name="Title"  type="simpletext" multiValued="false"/>
77     <field name="Content"  type="simpletext" multiValued="false"/>
78     <field name="Tel"  type="string" multiValued="false"/>
79     <field name="EventAddress"  type="simpletext" multiValued="false"/>
80     <field name="DutyGridName"  type="string" multiValued="false"/>
81     <field name="ComplaintType"  type="string" multiValued="false"/>
82     <field name="IsVoid"  type="boolean" multiValued="false"/>
83     <field name="IsEnd"  type="boolean" multiValued="false"/>
84     <field name="GridAddress"  type="simpletext" multiValued="false"/>
85     <field name="CategoryName"  type="string" multiValued="false"/>
86     <field name="Category"  type="string" multiValued="false"/>
87
88     <field name="Status"  type="string" multiValued="false"/>
89     <field name="RegisterOn"  type="date" multiValued="false"/>
90     <field name="DeadLine"  type="date" multiValued="false"/>
91     <field name="ReportOn"  type="date" multiValued="false"/>
92     <field name="EndCaseOn"  type="date" multiValued="false"/>
93     <field name="CreatedBy"  type="string" multiValued="false"/>
94     <field name="SourceWay"  type="string" multiValued="false"/>
95     <field name="ISWGXTSB"  type="string" multiValued="false"/>
96     <field name="RegisterOffice"  type="string" multiValued="false"/>
97     <field name="EventLevel"  type="string" multiValued="false"/>
98     <field name="ImportantLevel"  type="string" multiValued="false"/>
99
100         <!--
101                 PETITION/DISPATCH
102         -->
103           <field name="DispatchOffices" type="string" multiValued="true"/>
104           <!--<field name="DispatchOfficeNames" />-->
105           <field name="ReceiveOffices"  type="string" multiValued="true"/>
106           <field name="ReceiveOfficeNames"  type="string" multiValued="true"/>
107
108
109         <!--
110                 PETITION/PARTICIPANT
111         -->
112           <field name="OrgUnits" type="string" multiValued="true"/>
113           <field name="Participants"  type="string" multiValued="true"/>
114
115     <!--
116        DISPATCH
117       -->
118
119       <field name="Dispatcher" type="string" multiValued="false"/>
120       <field name="DispatchOn" type="date" multiValued="false"/>
121       <field name="DispatchOffice" type="string" multiValued="false"/>
122       <field name="DispatchOfficeName" type="string" multiValued="false"/>
123       <field name="ReceiveOffice" type="string" multiValued="false"/>
124       <field name="ReceiveOfficeName" type="string" multiValued="false"/>
125       <field name="StartOn"  type="date" multiValued="false"/>
126       <field name="DealWay"  type="string" multiValued="false"/>
127       <field name="FeedBackType"  type="string" multiValued="false"/>
128       <field name="FeedBackPeople"  type="string" multiValued="false"/>
129       <field name="FeedBackOn"   type="date" multiValued="false"/>
130       <field name="FeedBackMsg"   type="simpletext" multiValued="false"/>
131       <field name="NoPublicOpinion"   type="simpletext" multiValued="false"/>
132       <field name="IsPublic"  type="boolean" multiValued="false"/>
133       <field name="IsAlreadyReply"  type="boolean" multiValued="false"/>
134       <field name="IsAlreadyContact"  type="boolean" multiValued="false"/>
135
136   </fields>
137
138
139   <!-- copy author names and title titles to a field to autocomplete
140   <copyField source="canonical_name" dest="autocomplete"/>
141   <copyField source="title" dest="autocomplete"/> -->
142
143   <!-- copy everything into one big field for easy searching -->
144   <copyField source="PetitionNumber" dest="catchall"/>
145   <copyField source="Title" dest="catchall"/>
146   <copyField source="Content" dest="catchall"/>
147   <copyField source="Tel" dest="catchall"/>
148   <copyField source="EventAddress" dest="catchall"/>
149
150   <!-- A unique Key field isn't neccessary, but it's the only way Solr -->
151   <!-- can automaticly replace docs when they change -->
152   <!-- DataImportHandler is also very unhappy if you don't have one -->
153   <uniqueKey>doc_id</uniqueKey>
154
155   <!-- It's a *very* good idea to have a default search field -->
156   <defaultSearchField>catchall</defaultSearchField>
157
158 </schema>


db-data-config.xml



1 <dataConfig>
2     <dataSource type="JdbcDataSource"
3                 driver="oracle.jdbc.driver.OracleDriver"
4                 url="jdbc:oracle:thin:@192.168.0.0:1521:test"
5                 user="user"
6                 password="pwd"
7                 />
8     <document>
9         <entity name="petition"
10                 pk="ID"
11                 transformer="TemplateTransformer"
12                 query="select * from VW_HIS_EventInfo "
13                 deltaImportQuery="select t.* from VW_HIS_EventInfo t where t.id='${dataimporter.delta.ID}'"
14                 deltaQuery="select t.* from VW_HIS_EventInfo t where
15                 RegistorOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
16                 or ReportOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
17                 or endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') ">
18                 >
19           <field  column="doc_id" template="PE_${petition.ID}" />
20           <field  column="doc_type"  template="PE" />
21
22           <!--<field  column="LATLON" name="LatLon_p"/>-->
23           <field  column="TENANTID" name="TenantId" />
24           <field  column="ID" name="PetitionId" />
25           <field  column="PETITIONNUMBER" name="PetitionNumber" />
26           <field  column="TITLE" name="Title" />
27           <field  column="CONTENT" name="Content" />
28           <field  column="TEL" name="Tel" />
29           <field  column="EVENTADDRESS" name="EventAddress" />
30           <!--<field  column="AREANAME" name="AreaName" />-->
31           <field  column="DUTYGRIDNAME" name="DutyGridName" />
32           <field  column="GRIDADDRESS" name="GridAddress" />
33           <field  column="COMPLAINTQUALITYNAME" name="ComplaintType" />
34           <field  column="ISVOID" name="IsVoid" />
35           <field  column="ISEND" name="IsEnd" />
36           <field  column="CATEGORYNAME" name="CategoryName" />
37           <field  column="CATEGORYCODE" name="Category" />
38           <field  column="STATUS" name="Status" />
39           <field  column="REGISTORON" name="RegisterOn" />
40           <field  column="DEADLINE" name="DeadLine" />
41           <field  column="CREATEDBY" name="CreatedBy" />
42           <field  column="REPORTON" name="ReportOn" />
43           <field  column="SOURCEWAY" name="SourceWay" />
44           <field  column="ISWGXTSB" name="ISWGXTSB" />
45           <field  column="REGISTOROFFICE" name="RegisterOffice" />
46           <!--<field  column="TOOFFICENAME" name="ToOfficeName" />-->
47           <field  column="EVENTLEVEL" name="EventLevel" />
48           <field  column="IMPORTANTLEVEL" name="ImportantLevel" />
49           <field  column="ENDCASEON" name="EndCaseOn" />
50           <!--<field  column="ENDOPINION" name="EndOpinion" />-->
51
52             <entity name="petition_dispatch"
53                   pk="ID"
54                   query="select * from VW_HIS_DispatchInfo where petitionid='${petition.ID}'"
55                   deltaQuery="select ID from VW_HIS_DispatchInfo where
56                             DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
57                         or  FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
58                   parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petition_dispatch.PETITIONID}'">
59                   <!--<field  column="DISPATCHER" name="Dispatchers" />-->
60
61                   <field  column="DISPATCHOFFICE" name="DispatchOffices" />
62                   <!--<field  column="DISPATCHOFFICENAME" name="DispatchOfficeNames" />-->
63                   <field  column="RECEIVEOFFICE" name="ReceiveOffices" />
64                   <field  column="RECEIVEOFFICENAME" name="ReceiveOfficeNames" />
65             </entity>
66
67             <entity name="petiton_participant"
68                   pk="PARTICIPANT"
69                   query="select distinct ORGUNIT,PARTICIPANT from vw_his_participant where petitionid='${petition.ID}'"
70                   deltaQuery="select PARTICIPANT from vw_his_participant where HandleOn > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') "
71                   parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petiton_participant.PETITIONID}'">
72                     <!--<field column="HANDLEON" name="HandleOns"/>-->
73                     <field column="ORGUNIT" name="OrgUnits"/>
74                     <!--<field column="PARTICIPANTNAME" name="ParticipantNames"/>-->
75                     <field column="PARTICIPANT" name="Participants"/>
76                     <!--<field column="PARTICIPANTTYPE" name="ParticipantTypes"/>-->
77             </entity>
78
79         </entity>
80
81         <entity name="dispatch"
82               pk="ID"
83               transformer="TemplateTransformer"
84               query="select * from VW_HIS_DispatchInfo"
85               deltaImportQuery="select t.* from VW_HIS_DispatchInfo t where t.id='${dataimporter.delta.ID}'"
86               deltaQuery="select ID from VW_HIS_DispatchInfo where DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
87                     or  FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')">
88
89             <field  column="doc_id" template="DIS_${dispatch.ID}" />
90             <field  column="doc_type" template="DIS" />
91
92             <field  column="TENANTID" name="TenantId" />
93             <field  column="PETITIONID" name="PetitionId" />
94               <field  column="DISPATCHER" name="Dispatcher" />
95               <field  column="DISPATCHON" name="DispatchOn" />
96               <field  column="DISPATCHOFFICE" name="DispatchOffice" />
97               <field  column="DISPATCHOFFICENAME" name="DispatchOfficeName" />
98               <field  column="RECEIVEOFFICE" name="ReceiveOffice" />
99               <field  column="RECEIVEOFFICENAME" name="ReceiveOfficeName" />
100               <field  column="STARTON" name="StartOn" />
101               <field  column="DEADLINE" name="DeadLine" />
102               <field  column="DEALWAY" name="DealWay" />
103               <field  column="STATUS" name="Status" />
104               <field  column="FEEDBACKTYPE" name="FeedBackType" />
105               <field  column="FEEDBACKPEOPLE" name="FeedBackPeople" />
106               <field  column="FEEDBACKON" name="FeedBackOn" />
107               <field  column="FEEDBACKMSG" name="FeedBackMsg" />
108               <field  column="NOPUBLICOPINION" name="NoPublicOpinion" />
109               <field  column="ISPUBLIC" name="IsPublic" />
110               <field  column="ISALREADYREPLY" name="IsAlreadyReply" />
111               <field  column="ISALREADYCONCAT" name="IsAlreadyContact" />
112
113               <entity name="dispatch_petition"
114                   pk="ID"
115                   query="select * from VW_HIS_EventInfo where id='${dispatch.PETITIONID}'"
116                   deltaQuery="select ID from VW_HIS_EventInfo where endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
117                   parentDeltaQuery="select ID from VW_HIS_DispatchInfo where PETITIONID='${dispatch_petition.ID}'">
118                   <field  column="PETITIONNUMBER" name="PetitionNumber" />
119                   <field  column="TITLE" name="Title" />
120                   <field  column="CONTENT" name="Content" />
121                   <field  column="TEL" name="Tel" />
122                   <field  column="EVENTADDRESS" name="EventAddress" />
123                   <!--<field  column="AREANAME" name="AreaName" />-->
124                   <field  column="DUTYGRIDNAME" name="DutyGridName" />
125                   <field  column="GRIDADDRESS" name="GridAddress" />
126                   <field  column="COMPLAINTQUALITYNAME" name="ComplaintType" />
127                   <field  column="CATEGORYNAME" name="CategoryName" />
128                   <field  column="CATEGORYCODE" name="Category" />
129             </entity>
130         </entity>
131
132      </document>
133 </dataConfig>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: