以下内容来自公众号逆锋起笔,关注每日干货及时送达
inverted file index:{单词,单词所在文档的id}
full inverted index:{单词,(单词所在文档的id,再具体文档中的位置)}
CREATE TABLE table_name ( id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, author VARCHAR(200),
title VARCHAR(200), content TEXT(500), FULLTEXT full_index_name (col_name) ) ENGINE=InnoDB;
SELECT table_id, name, space from INFORMATION_SCHEMA.INNODB_TABLESWHERE name LIKE 'test/%';
CREATE FULLTEXT INDEX full_index_name ON table_name(col_name);
MATCH(col1,col2,...) AGAINST(expr[search_modifier])
search_modifier:
{
IN NATURAL LANGUAGE MODE
| IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION
| IN BOOLEAN MODE
| WITH QUERY EXPANSION
}
SELECT
count(*) AS count
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( 'MySQL' );
SELECT count(IF(MATCH ( title, body ) against ( 'MySQL' ), 1, NULL )) AS count FROM `fts_articles`;
SELECT
*,
MATCH ( title, body ) against ( 'MySQL' ) AS Relevance
FROM
fts_articles;
word 是否在文档中出现 word 在文档中出现的次数 word 在索引列中的数量 多少个文档包含该 word
查询的 word 在 stopword 列中,忽略该字符串的查询
查询的 word 的字符长度是否在区间 [innodb_ft_min_token_size,innodb_ft_max_token_size] 内
SELECT *, MATCH ( title, body ) against ( 'for' ) AS Relevance FROM fts_articles;
select * from fts_test where MATCH(content) AGAINST('+Pease -hot' IN BOOLEAN MODE);
+:表示该 word 必须存在
-:表示该 word 必须不存在
(no operator)表示该 word 是可选的,但是如果出现,其相关性会更高
@distance表示查询的多个单词之间的距离是否在 distance 之内,distance 的单位是字节,这种全文检索的查询也称为 Proximity Search,如 MATCH(context) AGAINST('"Pease hot"@30' IN BOOLEAN MODE)语句表示字符串 Pease 和 hot 之间的距离需在30字节内
>:表示出现该单词时增加相关性
<:表示出现该单词时降低相关性
~:表示允许出现该单词,但出现时相关性为负
* :表示以该单词开头的单词,如 lik*,表示可以是 lik,like,likes
" :表示短语
SELECT
*
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( '+MySQL -YourSQL' IN BOOLEAN MODE );
SELECT
*
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( 'MySQL IBM' IN BOOLEAN MODE );
SELECT
*
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( '"DB2 IBM"@3' IN BOOLEAN MODE );
SELECT
*
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( '+MySQL +(>database <DBMS)' IN BOOLEAN MODE );
SELECT
*
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( 'MySQL ~database' IN BOOLEAN MODE );
SELECT
*
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( 'My*' IN BOOLEAN MODE );
SELECT
*
FROM
`fts_articles`
WHERE
MATCH ( title, body ) AGAINST ( '"MySQL Security"' IN BOOLEAN MODE );
第一阶段:根据搜索的单词进行全文索引查询
第二阶段:根据第一阶段产生的分词再进行一次全文检索的查询
-- 创建索引
create FULLTEXT INDEX title_body_index on fts_articles(title,body);
-- 使用 Natural Language 模式查询
SELECT
*
FROM
`fts_articles`
WHERE
MATCH(title,body) AGAINST('database');
-- 当使用 Query Expansion 模式查询
SELECT
*
FROM
`fts_articles`
WHERE
MATCH(title,body) AGAINST('database' WITH QUERY expansion);
DROP INDEX full_idx_name ON db_name.table_name;
ALTER TABLE db_name.table_name DROP INDEX full_idx_name;