📍 导航:返回目录 | 上一节:ClickHouse | 下一节:Nginx
- Index(索引):类似数据库
- Document(文档):一条记录(JSON格式)
- Field(字段):文档的属性
- Mapping(映射):字段类型定义(类似表结构)
- Shard(分片):数据分片,提高并行处理能力
- Replica(副本):数据备份,提高可用性和查询性能
原理:建立词项到文档的映射
Term → Document IDs
---------------------------------------
elastic → [1, 3, 5]
search → [1, 2, 5]
engine → [3, 4, 5]
查询 "elastic search":
- 查找 "elastic" → [1, 3, 5]
- 查找 "search" → [1, 2, 5]
- 取交集 → [1, 5]
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "ik_max_word"
},
"price": {
"type": "double"
},
"category": {
"type": "keyword"
},
"created_at": {
"type": "date"
}
}
}
}# 指定 ID
PUT /products/_doc/1
{
"name": "iPhone 15 Pro",
"price": 7999,
"category": "手机",
"created_at": "2024-01-01"
}
# 自动生成 ID
POST /products/_doc
{
"name": "MacBook Pro",
"price": 12999,
"category": "电脑"
}# 根据 ID 查询
GET /products/_doc/1
# 查询所有
GET /products/_search
{
"query": {
"match_all": {}
}
}# match 查询(分词)
GET /products/_search
{
"query": {
"match": {
"name": "iPhone Pro"
}
}
}
# multi_match(多字段)
GET /products/_search
{
"query": {
"multi_match": {
"query": "苹果手机",
"fields": ["name", "description"]
}
}
}
# match_phrase(短语匹配)
GET /products/_search
{
"query": {
"match_phrase": {
"name": "iPhone 15"
}
}
}# term 查询(不分词)
GET /products/_search
{
"query": {
"term": {
"category": "手机"
}
}
}
# terms 查询(多个值)
GET /products/_search
{
"query": {
"terms": {
"category": ["手机", "电脑"]
}
}
}GET /products/_search
{
"query": {
"range": {
"price": {
"gte": 5000,
"lte": 10000
}
}
}
}GET /products/_search
{
"query": {
"bool": {
"must": [
{ "match": { "name": "iPhone" } }
],
"filter": [
{ "range": { "price": { "gte": 5000 } } }
],
"should": [
{ "term": { "category": "手机" } }
],
"must_not": [
{ "term": { "status": "下架" } }
]
}
}
}GET /products/_search
{
"size": 0,
"aggs": {
"group_by_category": {
"terms": {
"field": "category",
"size": 10
}
}
}
}GET /products/_search
{
"size": 0,
"aggs": {
"avg_price": {
"avg": { "field": "price" }
},
"max_price": {
"max": { "field": "price" }
},
"stats_price": {
"stats": { "field": "price" }
}
}
}GET /products/_search
{
"size": 0,
"aggs": {
"group_by_category": {
"terms": { "field": "category" },
"aggs": {
"avg_price": {
"avg": { "field": "price" }
}
}
}
}
}PUT /logs
{
"mappings": {
"properties": {
"message": {
"type": "text",
"index": false # 不需要搜索的字段,禁用索引
},
"status": {
"type": "keyword" # 精确匹配用 keyword
},
"timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}经验公式:
- 单分片大小:20-50GB
- 分片数 = 数据总量 / 单分片大小
- 副本数 = 1-2(根据可用性要求)
# Bulk API(批量插入)
POST _bulk
{ "index": { "_index": "products", "_id": "1" } }
{ "name": "iPhone 15", "price": 5999 }
{ "index": { "_index": "products", "_id": "2" } }
{ "name": "iPad Pro", "price": 6799 }建议:每批次 1000-5000 条
# 指定路由(相同路由的文档在同一分片)
PUT /products/_doc/1?routing=user123
{
"user_id": "user123",
"name": "iPhone 15"
}
# 查询时也指定路由
GET /products/_search?routing=user123
{
"query": {
"term": { "user_id": "user123" }
}
}import (
"github.com/elastic/go-elasticsearch/v8"
"encoding/json"
"strings"
)
func main() {
// 创建客户端
es, err := elasticsearch.NewDefaultClient()
if err != nil {
panic(err)
}
// 索引文档
doc := map[string]interface{}{
"name": "iPhone 15 Pro",
"price": 7999,
}
body, _ := json.Marshal(doc)
res, err := es.Index(
"products",
strings.NewReader(string(body)),
es.Index.WithDocumentID("1"),
)
// 搜索
query := `{
"query": {
"match": {
"name": "iPhone"
}
}
}`
res, err = es.Search(
es.Search.WithIndex("products"),
es.Search.WithBody(strings.NewReader(query)),
)
// 解析结果
var result map[string]interface{}
json.NewDecoder(res.Body).Decode(&result)
hits := result["hits"].(map[string]interface{})["hits"].([]interface{})
for _, hit := range hits {
source := hit.(map[string]interface{})["_source"]
fmt.Println(source)
}
}Logstash → Elasticsearch → Kibana
Logstash 配置:
input {
file {
path => "/var/log/app.log"
start_position => "beginning"
}
}
filter {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:msg}" }
}
date {
match => [ "timestamp", "ISO8601" ]
target => "@timestamp"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "app-logs-%{+YYYY.MM.dd}"
}
}Elasticsearch 是强大的分布式搜索引擎,适用于全文检索、日志分析、实时数据分析等场景。
关键要点:
- ✅ 倒排索引是快速搜索的基础
- ✅ Query DSL 提供灵活的查询能力
- ✅ 聚合分析支持复杂统计
- ✅ 合理设计 Mapping 和分片是性能关键
- 《Elasticsearch 权威指南》
- 《深入理解 Elasticsearch》
- ES 官方文档
💡 思考题:
- 倒排索引的原理是什么?
- text 和 keyword 类型有什么区别?
- 如何设计合理的分片数量?
⏮️ 上一节:ClickHouse | ⏭️ 下一节:Nginx