PHP全文搜索引擎与索引优化
PHP全文搜索引擎与索引优化全文搜索是应用的核心功能。从数据库全文索引到ElasticsearchPHP有多种搜索方案。今天说说PHP中全文搜索引擎的构建和索引优化。先看MySQL全文索引的使用和优化。phpclass FulltextSearch{private PDO $pdo;public function __construct(PDO $pdo){$this-pdo $pdo;}public function search(string $keyword, int $page 1, int $perPage 20): array{$offset ($page - 1) * $perPage;$stmt $this-pdo-prepare(SELECT *, MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE) AS relevanceFROM articlesWHERE MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE)ORDER BY relevance DESCLIMIT :limit OFFSET :offset);$stmt-execute([keyword $keyword,limit $perPage,offset $offset,]);$results $stmt-fetchAll();$countStmt $this-pdo-prepare(SELECT COUNT(*) FROM articlesWHERE MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE));$countStmt-execute([keyword $keyword]);$total (int)$countStmt-fetchColumn();return [data $results,total $total,page $page,per_page $perPage,total_pages ceil($total / $perPage),];}public function searchWithHighlight(string $keyword): array{$stmt $this-pdo-prepare(SELECT *,MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE) AS relevanceFROM articlesWHERE MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE)ORDER BY relevance DESCLIMIT 20);$stmt-execute([keyword $keyword]);$results $stmt-fetchAll();foreach ($results as $row) {$row[title_highlight] $this-highlight($row[title], $keyword);$row[content_highlight] $this-highlight(substr($row[content], 0, 300), $keyword);}return $results;}private function highlight(string $text, string $keyword): string{$keywords explode( , $keyword);foreach ($keywords as $word) {$text preg_replace(/({$word})/iu, $1, $text);}return $text;}public function suggest(string $prefix, int $limit 10): array{$stmt $this-pdo-prepare(SELECT DISTINCT title FROM articlesWHERE title LIKE :prefixLIMIT :limit);$stmt-execute([prefix {$prefix}%, limit $limit]);return $stmt-fetchAll(PDO::FETCH_COLUMN);}public function rebuildIndex(): void{$this-pdo-exec(REPAIR TABLE articles);$this-pdo-exec(OPTIMIZE TABLE articles);}}?Elasticsearch PHP客户端的实现:phpclass ElasticsearchClient{private string $host;private string $index;public function __construct(string $host http://localhost:9200, string $index app){$this-host rtrim($host, /);$this-index $index;}public function indexDocument(string $id, array $document): void{$this-request(PUT, /{$this-index}/_doc/{$id}, $document);}public function bulkIndex(array $documents): void{$body ;foreach ($documents as $id $doc) {$body . json_encode([index [_id $id]]) . \n;$body . json_encode($doc) . \n;}$this-request(POST, /{$this-index}/_bulk, $body, application/x-ndjson);}public function search(string $keyword, int $page 1, int $perPage 20): array{$offset ($page - 1) * $perPage;$query [query [bool [should [[match [title [query $keyword, boost 3]]],[match [content [query $keyword, boost 1]]],],],],from $offset,size $perPage,sort [_score desc],];$result $this-request(GET, /{$this-index}/_search, $query);$hits $result[hits][hits] ?? [];$total $result[hits][total][value] ?? 0;return [data $hits,total $total,page $page,per_page $perPage,];}public function deleteDocument(string $id): void{$this-request(DELETE, /{$this-index}/_doc/{$id});}public function createIndex(array $mapping []): void{$defaultMapping [settings [number_of_shards 1,number_of_replicas 1,],mappings [properties [title [type text, analyzer standard],content [type text, analyzer standard],],],];$mapping array_merge_recursive($defaultMapping, $mapping);$this-request(PUT, /{$this-index}, $mapping);}private function request(string $method, string $path, mixed $body null, string $contentType application/json): mixed{$ch curl_init($this-host . $path);$options [CURLOPT_RETURNTRANSFER true,CURLOPT_CUSTOMREQUEST $method,CURLOPT_TIMEOUT 10,];if ($body ! null) {$options[CURLOPT_POSTFIELDS] is_string($body) ? $body : json_encode($body);$options[CURLOPT_HTTPHEADER] [Content-Type: . $contentType];}curl_setopt_array($ch, $options);$response curl_exec($ch);$httpCode curl_getinfo($ch, CURLINFO_HTTP_CODE);curl_close($ch);return json_decode($response, true) ?: [];}}?搜索是用户体验的关键环节。MySQL全文索引适合中小规模数据Elasticsearch适合海量数据的搜索。索引优化可以提高搜索速度分词器影响搜索的准确性。搜索结果的相关性排序比简单的LIKE查询好得多高亮显示匹配词可以提示用户为什么看到这个结果。