Elasticsearch 是一个实时的分布式搜索分析引擎, 它能让你以一个之前从未有过的速度和规模,去探索你的数据。 它被用作全文检索、结构化搜索、分析以及这三个功能的组合
# mac直接brew
brew install elasticsearch
# 其它
https://www.elastic.co/downloads/elasticsearch
概念:
hits
中文意思是击中
,这里相当于匹配到的数据total
匹配的总个数max_score
最大分值_index
索引 相当于SQL中的database
_type
类型 相当于SQL中的table
_id
唯一标识 相当于SQL中的id
_score
分值,代表匹配的程度_source
元数据 相当于SQL中table
中的字段
合集took
搜索耗时_shards
查询中参与分片的总数timed_out
查询是否超时
curl -XGET "http://localhost:9200/website/blog/_search?pretty" -H 'Content-Type:application/json'
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 1.0,
"hits" : [
{
"_index" : "website",
"_type" : "blog",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"title" : "标题2",
"text" : "内容2",
"views" : 10,
"tags" : [
"标签2",
"标签3"
]
}
}
]
}
}
"_version": 1,
版本号,每次操作都会递增"result": "created",
操作结果
# 新增
curl -X PUT "http://localhost:9200/website/blog/123?pretty" -H 'Content-Type: application/json' -d'
{
"title" : "标题123",
"text" : "内容123",
"views" : 123,
"tags" : [
"标签122",
"标签133"
]
}
'
{
"_index": "website",
"_type": "blog",
"_id": "123",
"_version": 1,
"result": "created",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 0,
"_primary_term": 9
}
# 更新
curl -X PUT "http://localhost:9200/website/blog/123?pretty" -H 'Content-Type: application/json' -d'
{
"title" : "标题123",
"text" : "内容123",
"views" : 1230,
"tags" : [
"标签122",
"标签133"
]
}
'
{
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 2,
"_primary_term" : 9
}
curl -X GET "http://localhost:9200/website/blog/123?pretty" -H 'Content-Type: application/json'
{
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_version" : 5,
"found" : true,
"_source" : {
"title" : "标题123",
"text" : "内容123",
"views" : 123,
"tags" : [
"标签122",
"标签133"
]
}
}
curl -X DELETE "http://localhost:9200/website/blog/123?pretty" -H 'Content-Type: application/json'
{
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_version" : 3,
"result" : "deleted",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 3,
"_primary_term" : 9
}
上面我们用过了基本的搜索curl -XGET "http://localhost:9200/website/blog/_search?pretty" -H 'Content-Type:application/json'
接下来进一步介绍
size
显示应该返回的结果数量,默认是 10from
显示应该跳过的初始结果数量,默认是 0
# 分页
curl -XGET "http://localhost:9200/website/blog/_search?size=1&from=0&pretty" -H 'Content-Type:application/json'
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [
{
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_score" : 1.0,
"_source" : {
"title" : "标题123",
"text" : "内容123",
"views" : 123,
"tags" : [
"标签122",
"标签133"
]
}
}
]
}
}
_mapping
映射,对元数据字段的猜测,并不一定准确
curl -XGET "http://localhost:9200/website/_mapping/blog?pretty" -H 'Content-Type:application/json'
{
"website" : {
"mappings" : {
"blog" : {
"properties" : {
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"text" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"views" : {
"type" : "long"
}
}
}
}
}
}
_analyze
分析,对数据进行分析,比如分词token
是实际存储到索引中的词条position
指明词条在原始文本中出现的位置start_offset
和end_offset
指明字符在原始字符串中的位置
ik_max_word是中文分词elasticsearch-analysis-ik
curl -XPOST "http://localhost:9200/_analyze?pretty" -H 'Content-Type:application/json' -d '
{
"analyzer": "standard",
"text": "Text to analyze"
}
'
{
"tokens" : [
{
"token" : "text",
"start_offset" : 0,
"end_offset" : 4,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "to",
"start_offset" : 5,
"end_offset" : 7,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "analyze",
"start_offset" : 8,
"end_offset" : 15,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}
# 新增映射
curl -XPUT "http://localhost:9200/gb?pretty" -H 'Content-Type:application/json' -d '
{
"mappings": {
"tweet" : {
"properties" : {
"tweet" : {
"type" : "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"date" : {
"type" : "date"
},
"name" : {
"type" : "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"user_id" : {
"type" : "long"
}
}
}
}
}
'
{
"acknowledged" : true,
"shards_acknowledged" : true,
"index" : "gb"
}
# 测试映射
curl -XGET "http://localhost:9200/gb/_mapping/tweet?pretty" -H 'Content-Type:application/json'
{
"gb" : {
"mappings" : {
"tweet" : {
"properties" : {
"date" : {
"type" : "date"
},
"name" : {
"type" : "text",
"analyzer" : "ik_max_word"
},
"tweet" : {
"type" : "text",
"analyzer" : "ik_max_word"
},
"user_id" : {
"type" : "long"
}
}
}
}
}
}
constant_score
非评分模式,_score
都是1filter
过滤term
精确搜索单个terms
精确搜索多个
select * from blog where views=123
curl -XGET "http://localhost:9200/website/blog/_search?pretty" -H 'Content-Type:application/json' -d '
{
"query": {
"constant_score": {
"filter": {
"term": {
"views": 123
}
}
}
}
}
'
{
"took" : 30,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [
{
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_score" : 1.0,
"_source" : {
"title" : "标题123",
"text" : "内容123",
"views" : 123,
"tags" : [
"标签122",
"标签133"
]
}
}
]
}
}
must
所有的语句都 必须(must) 匹配,与AND
等价。must_not
所有的语句都 不能(must not) 匹配,与NOT
等价。should
至少有一个语句要匹配,与OR
等价。
select * from blog where views=123 and text="内容123"
curl -XGET "http://localhost:9200/website/blog/_search?pretty" -H 'Content-Type:application/json' -d '
{
"query": {
"bool": {
"must": [
{"term": {"views": 123}},
{"match": {"text": "内容"}}
]
}
}
}
'
结果同上
range
范围gt
大于gte
大于等于lt
小于lte
小于等于
select * from blog where views between 100 and 200
curl -XGET "http://localhost:9200/website/blog/_search?pretty" -H 'Content-Type:application/json' -d '
{
"query": {
"range": {
"views": {
"gte": 100,
"lte": 200
}
}
}
}
'
结果同上
exists
存在值is not null
missing
缺失值is null
curl -XGET "http://localhost:9200/website/blog/_search?pretty" -H 'Content-Type:application/json' -d '
{
"query": {
"constant_score": {
"filter": {
"exists": {
"field": "tags"
}
},
"boost": 1.2
}
}
}
'
match
匹配,可以多词"operator": "and"
提高精度"minimum_should_match": "75%"
控制精度boost
权重
select * from blog where title like %标题%
curl -XGET "http://localhost:9200/website/blog/_search?pretty" -H 'Content-Type:application/json' -d '
{
"query": {
"match": {
"title": "标题 2",
"operator": "and",
"minimum_should_match": "75%"
}
}
}
'
Elasticsearch 执行上面这个 match 查询的步骤是:
检查字段类型
标题 title 字段是一个 text
类型( analyzed )已分析的全文字段,这意味着查询字符串本身也应该被分析
分析查询字符串
将查询的字符串 标题
传入标准分析器中,输出的结果是单个项 标题
。因为只有一个单词项,所以 match 查询执行的是单个底层 term 查询
查找匹配文档
用 term 查询在倒排索引中查找 标题
然后获取一组包含该项的文档
为每个文档评分
用 term 查询计算每个文档相关度评分 _score ,这是种将 词频(term frequency,即词 标题
在相关文档的 title 字段中出现的频率)和反向文档频率(inverse document frequency,即词 标题
在所有文档的 title 字段中出现的频率),以及字段的长度(即字段越短相关度越高)相结合的计算方式