Overview

Namespaces

  • elasticsearch

Classes

  • Config
  • Defaults
  • Faceting
  • Indexer
  • Searcher
  • Overview
  • Namespace
  • Class
  • Tree
  1: <?php
  2: namespace elasticsearch;
  3: 
  4: /**
  5: * This class handles the magic of building documents and sending them to ElasticSearch for indexing.
  6: *
  7: * @license http://opensource.org/licenses/MIT
  8: * @author Paris Holley <[email protected]>
  9: * @version 2.0.0
 10: **/
 11: class Indexer{
 12:     /**
 13:     * The number of posts to index per page when re-indexing
 14:     *
 15:     * @return integer posts per page
 16:     **/
 17:     static function per_page(){
 18:         return Config::apply_filters('indexer_per_page', 10);
 19:     }
 20: 
 21:     /**
 22:     * Retrieve the posts for the page provided
 23:     *
 24:     * @param integer $page The page of results to retrieve for indexing
 25:     *
 26:     * @return WP_Post[] posts
 27:     **/
 28:     static function get_posts($page = 1){
 29:         $args = Config::apply_filters('indexer_get_posts', array(
 30:             'posts_per_page' => self::per_page(),
 31:             'post_type' => Config::types(),
 32:             'paged' => $page,
 33:             'post_status' => 'publish'
 34:         ));
 35: 
 36:         return get_posts($args);
 37:     }
 38: 
 39:     /**
 40:     * Retrieve count of the number of posts available for indexing
 41:     *
 42:     * @return integer number of posts
 43:     **/
 44:     static function get_count(){
 45:         $query = new \WP_Query(array(
 46:             'post_type' => Config::types(),
 47:             'post_status' => 'publish'
 48:         ));
 49: 
 50:         return $query->found_posts; //performance risk?
 51:     }
 52: 
 53:     /**
 54:     * Removes all data in the ElasticSearch index
 55:     **/
 56:     static function clear(){
 57:         foreach(Config::types() as $type){
 58:             $index = self::_index(true);
 59:             $mapping = $index->getMapping();
 60: 
 61:             if(isset($mapping[Config::option('server_index')])){
 62:                 foreach($mapping[Config::option('server_index')] as $type => $props){
 63:                     $index->getType($type)->delete();
 64:                 }
 65:             }
 66:         }
 67: 
 68:         self::_map();
 69:     }
 70: 
 71:     /**
 72:     * Re-index the posts on the given page in the ElasticSearch index
 73:     *
 74:     * @param integer $page The page to re-index
 75:     **/
 76:     static function reindex($page = 1){
 77:         $index = self::_index(true);
 78: 
 79:         $posts = self::get_posts($page);
 80: 
 81:         foreach($posts as $post){
 82:             self::addOrUpdate($post);
 83:         }
 84: 
 85:         return count($posts);
 86:     }
 87: 
 88:     /**
 89:     * Removes a post from the ElasticSearch index
 90:     *
 91:     * @param WP_Post $post The wordpress post to remove
 92:     **/
 93:     static function delete($post){
 94:         $index = self::_index(true);
 95: 
 96:         $type = $index->getType($post->post_type);
 97: 
 98:         try{
 99:             $type->deleteById($post->ID);
100:         }catch(\Elastica\Exception\NotFoundException $ex){
101:             // ignore
102:         }
103:     }
104: 
105:     /**
106:     * Updates an existing document in the ElasticSearch index (or creates it if it doesn't exist)
107:     *
108:     * @param WP_Post $post The wordpress post to remove
109:     **/
110:     static function addOrUpdate($post){
111:         $type = self::_index(true)->getType($post->post_type);
112: 
113:         $data = self::_build_document($post);
114: 
115:         $type->addDocument(new \Elastica\Document($post->ID, $data));       
116:     }
117: 
118:     /**
119:     * Reads F.E.S configuration and updates ElasticSearch field mapping information (this can corrupt existing data).
120:     * @internal
121:     **/
122:     static function _map(){
123:         $numeric = Config::option('numeric');
124:         $notanalyzed = Config::option('not_analyzed');
125: 
126:         $index = self::_index(false);
127: 
128:         foreach(Config::taxonomies() as $tax){
129:             $props = array(
130:                 'type' => 'string',
131:                 'index' => 'not_analyzed'
132:             );
133: 
134:             $props = Config::apply_filters('indexer_map_taxonomy', $props, $tax);
135: 
136:             $propsname = array(
137:                 'type' => 'string'
138:             );
139: 
140:             $propsname = Config::apply_filters('indexer_map_taxonomy_name', $propsname, $tax);
141: 
142:             foreach(Config::types() as $type){
143:                 $type = $index->getType($type);
144: 
145:                 $mapping = new \Elastica\Type\Mapping($type);
146:                 $mapping->setProperties(array($tax => $props));
147:                 $mapping->send();
148: 
149:                 $mapping = new \Elastica\Type\Mapping($type);
150:                 $mapping->setProperties(array($tax . '_name' => $propsname));
151:                 $mapping->send();
152:             }           
153:         }
154: 
155:         foreach(Config::fields() as $field){
156:             $props = array(
157:                 'type' => 'string'
158:             );
159: 
160:             if(isset($numeric[$field])){
161:                 $props['type'] = 'float';
162:             }elseif($field == 'post_date'){
163:                 $props['type'] = 'date';
164:                 $props['format'] = 'date_time_no_millis';
165:             }elseif(isset($notanalyzed[$field])){
166:                 $props['index'] = 'not_analyzed';
167:             }else{
168:                 $props['index'] = 'analyzed';
169:             }
170: 
171:             $props = Config::apply_filters('indexer_map_field', $props, $field);
172: 
173:             foreach(Config::types() as $type){
174:                 $type = $index->getType($type);
175: 
176:                 $mapping = new \Elastica\Type\Mapping($type);
177:                 $mapping->setProperties(array($field => $props));
178: 
179:                 $mapping->send();
180:             }
181:         }
182:     }
183: 
184:     /**
185:     * Takes a wordpress post object and converts it into an associative array that can be sent to ElasticSearch
186:     *
187:     * @param WP_Post $post wordpress post object
188:     * @return array document data
189:     * @internal
190:     **/
191:     static function _build_document($post){
192:         global $blog_id;
193:         
194:         $document = array(
195:             'blog_id' => $blog_id
196:         );
197: 
198:         foreach(Config::fields() as $field){
199:             if(isset($post->$field)){
200:                 if($field == 'post_date'){
201:                     $document[$field] = date('c',strtotime($post->$field));
202:                 }else if($field == 'post_content'){
203:                     $document[$field] = strip_tags($post->$field);
204:                 }else{
205:                     $document[$field] = $post->$field;
206:                 }
207:             }
208:         }
209: 
210:         if(isset($post->post_type)){
211:             $taxes = array_intersect(Config::taxonomies(), get_object_taxonomies($post->post_type));
212: 
213:             foreach($taxes as $tax){
214:                 $document[$tax] = array();
215: 
216:                 foreach(wp_get_object_terms($post->ID, $tax) as $term){
217:                     if(!in_array($term->slug, $document[$tax])){
218:                         $document[$tax][] = $term->slug;
219:                         $document[$tax . '_name'][] = $term->name;
220:                     }
221: 
222:                     if(isset($term->parent) && $term->parent){
223:                         $parent = get_term($term->parent, $tax);
224:                         
225:                         while($parent != null){
226:                             if(!in_array($parent->slug, $document[$tax])){
227:                                 $document[$tax][] = $parent->slug;
228:                                 $document[$tax . '_name'][] = $parent->name;
229:                             }
230: 
231:                             if(isset($parent->parent) && $parent->parent){
232:                                 $parent = get_term($parent->parent, $tax);
233:                             }else{
234:                                 $parent = null;
235:                             }
236:                         }
237:                     }
238:                 }
239:             }
240:         }
241:         
242:         return Config::apply_filters('indexer_build_document', $document, $post);
243:     }
244: 
245:     /**
246:     * The Elastica\Client object used by F.E.S
247:     *
248:     * @param boolean $write Specifiy whether you are making read-only or write transactions (currently just adjusts timeout values)
249:     *
250:     * @return Elastica\Client
251:     * @internal
252:     **/
253:     static function _client($write = false){
254:         $settings = array(
255:             'url' => Config::option('server_url')
256:         );
257:         
258:         if($write){
259:             $settings['timeout'] = Config::option('server_timeout_write') ?: 300;
260:         }else{
261:             $settings['timeout'] = Config::option('server_timeout_read') ?: 1;
262:         }
263: 
264:         return new \Elastica\Client($settings);
265:     }
266: 
267:     /**
268:     * The Elastica\Index object used by F.E.S
269:     *
270:     * @param boolean $write Specifiy whether you are making read-only or write transactions (currently just adjusts timeout values)
271:     *
272:     * @return Elastica\Index
273:     * @internal
274:     **/
275:     static function _index($write = false){
276:         return self::_client($write)->getIndex(Config::option('server_index'));
277:     }
278: }
279: ?>
280: 
API documentation generated by ApiGen 2.8.0