-
Notifications
You must be signed in to change notification settings - Fork 263
/
db.c
1656 lines (1479 loc) · 63.8 KB
/
db.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "server.h"
#include "cluster.h"
#include <signal.h>
#include <ctype.h>
void slotToKeyAdd(robj *key);
void slotToKeyDel(robj *key);
void slotToKeyFlush(void);
/*-----------------------------------------------------------------------------
* C-level DB API
*----------------------------------------------------------------------------*/
/* Low level key lookup API, not actually called directly from commands
* implementations that should instead rely on lookupKeyRead(),
* lookupKeyWrite() and lookupKeyReadWithFlags(). */
// 该函数被lookupKeyRead()和lookupKeyWrite()和lookupKeyReadWithFlags()调用
// 从数据库db中取出key的值对象,如果存在返回该对象,否则返回NULL
// 返回key对象的值对象
robj *lookupKey(redisDb *db, robj *key, int flags) {
// 在数据库中查找key对象,返回保存该key的节点地址
dictEntry *de = dictFind(db->dict,key->ptr);
if (de) { //如果找到
robj *val = dictGetVal(de); //取出键对应的值对象
/* Update the access time for the ageing algorithm.
* Don't do it if we have a saving child, as this will trigger
* a copy on write madness. */
// 更新键的使用时间
if (server.rdb_child_pid == -1 &&
server.aof_child_pid == -1 &&
!(flags & LOOKUP_NOTOUCH))
{
val->lru = LRU_CLOCK();
}
return val; //返回值对象
} else {
return NULL;
}
}
/* Lookup a key for read operations, or return NULL if the key is not found
* in the specified DB.
*
* As a side effect of calling this function:
* 1. A key gets expired if it reached it's TTL.
* 2. The key last access time is updated.
* 3. The global keys hits/misses stats are updated (reported in INFO).
*
* This API should not be used when we write to the key after obtaining
* the object linked to the key, but only for read only operations.
*
* Flags change the behavior of this command:
*
* LOOKUP_NONE (or zero): no special flags are passed.
* LOOKUP_NOTOUCH: don't alter the last access time of the key.
*
* Note: this function also returns NULL is the key is logically expired
* but still existing, in case this is a slave, since this API is called only
* for read operations. Even if the key expiry is master-driven, we can
* correctly report a key is expired on slaves even if the master is lagging
* expiring our key via DELs in the replication link. */
// 以读操作取出key的值对象,没找到返回NULL
// 调用该函数的副作用如下:
// 1.如果一个键的到达过期时间TTL,该键被设置为过期的
// 2.键的使用时间信息被更新
// 3.全局键 hits/misses 状态被更新
// 注意:如果键在逻辑上已经过期但是仍然存在,函数返回NULL
robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
robj *val;
// 如果键已经过期且被删除
if (expireIfNeeded(db,key) == 1) {
/* Key expired. If we are in the context of a master, expireIfNeeded()
* returns 0 only when the key does not exist at all, so it's save
* to return NULL ASAP. */
// 键已过期,如果是主节点环境,表示key已经绝对被删除,如果是从节点,
if (server.masterhost == NULL) return NULL;
/* However if we are in the context of a slave, expireIfNeeded() will
* not really try to expire the key, it only returns information
* about the "logical" status of the key: key expiring is up to the
* master in order to have a consistent view of master's data set.
*
* However, if the command caller is not the master, and as additional
* safety measure, the command invoked is a read-only command, we can
* safely return NULL here, and provide a more consistent behavior
* to clients accessign expired values in a read-only fashion, that
* will say the key as non exisitng.
*
* Notably this covers GETs when slaves are used to scale reads. */
// 如果我们在从节点环境, expireIfNeeded()函数不会删除过期的键,它返回的仅仅是键是否被删除的逻辑值
// 过期的键由主节点负责,为了保证主从节点数据的一致
if (server.current_client &&
server.current_client != server.master &&
server.current_client->cmd &&
server.current_client->cmd->flags & CMD_READONLY)
{
return NULL;
}
}
// 键没有过期,则返回键的值对象
val = lookupKey(db,key,flags);
// 更新 是否命中 的信息
if (val == NULL)
server.stat_keyspace_misses++;
else
server.stat_keyspace_hits++;
return val;
}
/* Like lookupKeyReadWithFlags(), but does not use any flag, which is the
* common case. */
// 以读操作取出key的值对象,会更新是否命中的信息
robj *lookupKeyRead(redisDb *db, robj *key) {
return lookupKeyReadWithFlags(db,key,LOOKUP_NONE);
}
/* Lookup a key for write operations, and as a side effect, if needed, expires
* the key if its TTL is reached.
*
* Returns the linked value object if the key exists or NULL if the key
* does not exist in the specified DB. */
// 以写操作取出key的值对象,不更新是否命中的信息
robj *lookupKeyWrite(redisDb *db, robj *key) {
expireIfNeeded(db,key);
return lookupKey(db,key,LOOKUP_NONE);
}
// 以读操作取出key的值对象,如果key不存在,则发送reply信息,并返回NULL
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
robj *o = lookupKeyRead(c->db, key);
if (!o) addReply(c,reply);
return o;
}
// 以写操作取出key的值对象,如果key不存在,则发送reply信息,并返回NULL
robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
robj *o = lookupKeyWrite(c->db, key);
if (!o) addReply(c,reply);
return o;
}
/* Add the key to the DB. It's up to the caller to increment the reference
* counter of the value if needed.
*
* The program is aborted if the key already exists. */
// 讲key-val键值对添加到数据库中,该函数的调用者负责增加key-val的引用计数
void dbAdd(redisDb *db, robj *key, robj *val) {
sds copy = sdsdup(key->ptr); //复制key字符串
int retval = dictAdd(db->dict, copy, val); //将key-val添加到键值对字典
serverAssertWithInfo(NULL,key,retval == DICT_OK);
// 如果值对象是列表类型,有阻塞的命令,因此将key加入ready_keys字典中
if (val->type == OBJ_LIST) signalListAsReady(db, key);
// 如果开启了集群模式,则讲key添加到槽中
if (server.cluster_enabled) slotToKeyAdd(key);
}
/* Overwrite an existing key with a new value. Incrementing the reference
* count of the new value is up to the caller.
* This function does not modify the expire time of the existing key.
*
* The program is aborted if the key was not already present. */
// 用一个新的val重写已经存在key,该函数的调用者负责增加key-val的引用计数
// 该函数不修改该key的过期时间,如果key不存在,则程序终止
void dbOverwrite(redisDb *db, robj *key, robj *val) {
dictEntry *de = dictFind(db->dict,key->ptr); //找到保存key的节点地址
serverAssertWithInfo(NULL,key,de != NULL); //确保key被找到
dictReplace(db->dict, key->ptr, val); //重写val
}
/* High level Set operation. This function can be used in order to set
* a key, whatever it was existing or not, to a new object.
*
* 1) The ref count of the value object is incremented.
* 2) clients WATCHing for the destination key notified.
* 3) The expire time of the key is reset (the key is made persistent). */
// 高级的设置key,无论key是否存在,都将val与其关联
// 1.value对象的引用计数被增加
// 2.监控key的客户端收到键被修改的通知
// 3.键的过期时间被设置为永久
void setKey(redisDb *db, robj *key, robj *val) {
// 如果key不存在
if (lookupKeyWrite(db,key) == NULL) {
dbAdd(db,key,val); //讲key-val添加到db中
} else { //key存在
dbOverwrite(db,key,val); //用val讲key的原值覆盖
}
incrRefCount(val); //val引用计数加1
removeExpire(db,key); //移除key的过期时间
signalModifiedKey(db,key); //发送键被修改的信号
}
// 检查key是否存在于db中,返回1 表示存在
int dbExists(redisDb *db, robj *key) {
//从键值对字典中查找
return dictFind(db->dict,key->ptr) != NULL;
}
/* Return a random key, in form of a Redis object.
* If there are no keys, NULL is returned.
*
* The function makes sure to return keys not already expired. */
// 随机返回一个键的字符串类型的对象,且保证返回的键没有过期
robj *dbRandomKey(redisDb *db) {
dictEntry *de;
while(1) {
sds key;
robj *keyobj;
// 从键值对字典中随机返回一个节点地址
de = dictGetRandomKey(db->dict);
if (de == NULL) return NULL;
// 获取该节点保存的
key = dictGetKey(de);
// 为key创建一个字符串对象
keyobj = createStringObject(key,sdslen(key));
//如果这个key在过期字典中,检查key是否过期,如果过期且被删除,则释放该key对象,并且重新随机返回一个key
if (dictFind(db->expires,key)) {
if (expireIfNeeded(db,keyobj)) {
decrRefCount(keyobj);
continue; /* search for another key. This expired. */
}
}
return keyobj; //返回对象
}
}
/* Delete a key, value, and associated expiration entry if any, from the DB */
// 删除一个键值对以及键的过期时间,返回1表示删除成功
int dbDelete(redisDb *db, robj *key) {
/* Deleting an entry from the expires dict will not free the sds of
* the key, because it is shared with the main dictionary. */
// 过期字典中有键,那么将key对象从过期字典中删除
if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
// 将key-value从键值对字典中删除
if (dictDelete(db->dict,key->ptr) == DICT_OK) {
// 如果开启了集群模式,那么从槽中删除给定的键
if (server.cluster_enabled) slotToKeyDel(key);
return 1;
} else {
return 0;
}
}
/* Prepare the string object stored at 'key' to be modified destructively
* to implement commands like SETBIT or APPEND.
*
* An object is usually ready to be modified unless one of the two conditions
* are true:
*
* 1) The object 'o' is shared (refcount > 1), we don't want to affect
* other users.
* 2) The object encoding is not "RAW".
*
* If the object is found in one of the above conditions (or both) by the
* function, an unshared / not-encoded copy of the string object is stored
* at 'key' in the specified 'db'. Otherwise the object 'o' itself is
* returned.
*
* USAGE:
*
* The object 'o' is what the caller already obtained by looking up 'key'
* in 'db', the usage pattern looks like this:
*
* o = lookupKeyWrite(db,key);
* if (checkType(c,o,OBJ_STRING)) return;
* o = dbUnshareStringValue(db,key,o);
*
* At this point the caller is ready to modify the object, for example
* using an sdscat() call to append some data, or anything else.
*/
// 解除key的值对象的共享,用于修改key的值
robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
serverAssert(o->type == OBJ_STRING);
// 如果o对象是共享的(refcount > 1),或者o对象的编码不是RAW的
if (o->refcount != 1 || o->encoding != OBJ_ENCODING_RAW) {
robj *decoded = getDecodedObject(o); //获取o的字符串类型对象
// 根据o的字符串类型对象新创建一个RAW对象
o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr));
decrRefCount(decoded); //原有的对象解除共享
dbOverwrite(db,key,o); //重写key的val对象此时val对象是唯一的
}
return o;
}
// 清空所有数据库,返回删除键的个数
long long emptyDb(void(callback)(void*)) {
int j;
long long removed = 0;
// 遍历所有的数据库
for (j = 0; j < server.dbnum; j++) {
// 记录被删除键的数量
removed += dictSize(server.db[j].dict);
// 删除当前数据库的键值对字典
dictEmpty(server.db[j].dict,callback);
// 删除当前数据库的过期字典
dictEmpty(server.db[j].expires,callback);
}
//如果开启了集群模式,那么移除槽记录
if (server.cluster_enabled) slotToKeyFlush();
return removed;
}
// 切换数据库
int selectDb(client *c, int id) {
// id非法,返回错误
if (id < 0 || id >= server.dbnum)
return C_ERR;
// 设置当前client的数据库
c->db = &server.db[id];
return C_OK;
}
/*-----------------------------------------------------------------------------
* Hooks for key space changes.
*
* Every time a key in the database is modified the function
* signalModifiedKey() is called.
*
* Every time a DB is flushed the function signalFlushDb() is called.
*----------------------------------------------------------------------------*/
// 当key被修改,调用该函数
void signalModifiedKey(redisDb *db, robj *key) {
touchWatchedKey(db,key);
}
// 当数据库被清空,调用该函数
void signalFlushedDb(int dbid) {
touchWatchedKeysOnFlush(dbid);
}
/*-----------------------------------------------------------------------------
* Type agnostic commands operating on the key space
* 无类型命令的数据库操作
*----------------------------------------------------------------------------*/
// 清空client的数据库
void flushdbCommand(client *c) {
// 更新脏键
server.dirty += dictSize(c->db->dict);
// 当数据库被清空,调用该函数
signalFlushedDb(c->db->id);
// 清空键值对字典和过期字典
dictEmpty(c->db->dict,NULL);
dictEmpty(c->db->expires,NULL);
// 如果开启了集群模式,那么移除槽记录
if (server.cluster_enabled) slotToKeyFlush();
addReply(c,shared.ok); //回复client
}
// 清空服务器内的所有数据库
void flushallCommand(client *c) {
// 当数据库被清空,调用该函数
signalFlushedDb(-1);
// 更新脏键
server.dirty += emptyDb(NULL);
addReply(c,shared.ok); //回复client
// 如果正在执行RDB,取消执行的进程
if (server.rdb_child_pid != -1) {
kill(server.rdb_child_pid,SIGUSR1);
// 删除临时文件
rdbRemoveTempFile(server.rdb_child_pid);
}
// 更新RDB文件
if (server.saveparamslen > 0) {
/* Normally rdbSave() will reset dirty, but we don't want this here
* as otherwise FLUSHALL will not be replicated nor put into the AOF. */
// 正常的rdbSave()将会重置脏键,为了将脏键值放入AOF,需要备份脏键值
int saved_dirty = server.dirty;
// RDB持久化:程序将当前内存中的数据库快照保存到磁盘文件中
rdbSave(server.rdb_filename);
// 还原脏键
server.dirty = saved_dirty;
}
server.dirty++; //更新脏键
}
// DEL key [key ...]
// DEL 命令实现
void delCommand(client *c) {
int deleted = 0, j;
// 遍历所有的key
for (j = 1; j < c->argc; j++) {
// 检查是否过期,过期删除
expireIfNeeded(c->db,c->argv[j]);
// 将当前key从数据库中删除
if (dbDelete(c->db,c->argv[j])) {
// 键被修改,发送信号
signalModifiedKey(c->db,c->argv[j]);
// 发送"del"事件通知
notifyKeyspaceEvent(NOTIFY_GENERIC,
"del",c->argv[j],c->db->id);
// 更新脏键和被删除的键的数量
server.dirty++;
deleted++;
}
}
addReplyLongLong(c,deleted); //发送被删除键的数量给client
}
/* EXISTS key1 key2 ... key_N.
* Return value is the number of keys existing. */
// EXISTS key [key ...]
// EXISTS 命令实现
void existsCommand(client *c) {
long long count = 0;
int j;
// 遍历所有key
for (j = 1; j < c->argc; j++) {
// 检查是否过期,过期删除
expireIfNeeded(c->db,c->argv[j]);
// 如果当前key存在于数据库中,则计数加1
if (dbExists(c->db,c->argv[j])) count++;
}
addReplyLongLong(c,count);//发送key存在的数量给client
}
// SELECT index
// SELECT命令实现
void selectCommand(client *c) {
long id;
// 将index转换为整数保存在id中
if (getLongFromObjectOrReply(c, c->argv[1], &id,
"invalid DB index") != C_OK)
return;
// 如果开启了集群模式但是id不是0好数据库,发送错误信息
// 因为集群模式下,Redis只能使用ID为0的数据库,不支持多数据库空间。
if (server.cluster_enabled && id != 0) {
addReplyError(c,"SELECT is not allowed in cluster mode");
return;
}
// 切换数据库
if (selectDb(c,id) == C_ERR) {
addReplyError(c,"invalid DB index");
} else {
addReply(c,shared.ok);
}
}
// RANDOMKEY 命令实现 ,不删除返回的key
void randomkeyCommand(client *c) {
robj *key;
// 随机返回一个key,如果数据库为空则发送空回复
if ((key = dbRandomKey(c->db)) == NULL) {
addReply(c,shared.nullbulk);
return;
}
addReplyBulk(c,key); //将key回复给client
decrRefCount(key); //释放临时key对象
}
// KEYS pattern
// KEYS 命令实现
void keysCommand(client *c) {
dictIterator *di;
dictEntry *de;
sds pattern = c->argv[1]->ptr; //保存pattern参数
int plen = sdslen(pattern), allkeys; //
unsigned long numkeys = 0;
void *replylen = addDeferredMultiBulkLength(c); //因为不知道有多少命令回复,那么创建一个空链表,之后将回复填入
// 安全字典迭代器
di = dictGetSafeIterator(c->db->dict);
// 如果pattern是以"*"开头,那么就返回所有键
allkeys = (pattern[0] == '*' && pattern[1] == '\0');
// 迭代字典中的节点
while((de = dictNext(di)) != NULL) {
sds key = dictGetKey(de); //保存当前节点中的key
robj *keyobj;
// 如果有和pattern匹配的key
if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) {
// 创建字符串对象
keyobj = createStringObject(key,sdslen(key));
// 检查是否可以对象过期,没有过期就将该键对象回复给client
if (expireIfNeeded(c->db,keyobj) == 0) {
addReplyBulk(c,keyobj);
numkeys++;
}
decrRefCount(keyobj); //释放临时对象
}
}
dictReleaseIterator(di); //释放字典迭代器
setDeferredMultiBulkLength(c,replylen,numkeys); //设置回复client的长度
}
/* This callback is used by scanGenericCommand in order to collect elements
* returned by the dictionary iterator into a list. */
// scanCallback函数被scanGenericCommand函数使用,为了保存被字典迭代器返回到列表中的元素
void scanCallback(void *privdata, const dictEntry *de) {
void **pd = (void**) privdata;
list *keys = pd[0]; //被迭代的元素列表
robj *o = pd[1]; //当前值对象
robj *key, *val = NULL;
// 根据不同的编码类型,将字典节点de保存的键对象和值对象取出来,保存到key中,值对象保存到val中
if (o == NULL) {
sds sdskey = dictGetKey(de);
key = createStringObject(sdskey, sdslen(sdskey));
} else if (o->type == OBJ_SET) {
key = dictGetKey(de);
incrRefCount(key);
} else if (o->type == OBJ_HASH) {
key = dictGetKey(de);
incrRefCount(key);
val = dictGetVal(de);
incrRefCount(val);
} else if (o->type == OBJ_ZSET) {
key = dictGetKey(de);
incrRefCount(key);
val = createStringObjectFromLongDouble(*(double*)dictGetVal(de),0);
} else {
serverPanic("Type not handled in SCAN callback.");
}
// 将key保存到被迭代元素的列表中,如果有值val,同样加入到列表中
listAddNodeTail(keys, key);
if (val) listAddNodeTail(keys, val);
}
/* Try to parse a SCAN cursor stored at object 'o':
* if the cursor is valid, store it as unsigned integer into *cursor and
* returns C_OK. Otherwise return C_ERR and send an error to the
* client. */
// 获取scan命令的游标,尝试取解析一个保存在o中的游标,如果游标合法,保存到cursor中否则返回C_ERR
int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) {
char *eptr;
/* Use strtoul() because we need an *unsigned* long, so
* getLongLongFromObject() does not cover the whole cursor space. */
errno = 0;
//将o对象的字符串类型值转换为unsigned long int类型10进制数
*cursor = strtoul(o->ptr, &eptr, 10);
// 转换错误检查
if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' || errno == ERANGE)
{
addReplyError(c, "invalid cursor");
return C_ERR;
}
return C_OK;
}
/* This command implements SCAN, HSCAN and SSCAN commands.
* If object 'o' is passed, then it must be a Hash or Set object, otherwise
* if 'o' is NULL the command will operate on the dictionary associated with
* the current database.
*
* When 'o' is not NULL the function assumes that the first argument in
* the client arguments vector is a key so it skips it before iterating
* in order to parse options.
*
* In the case of a Hash object the function returns both the field and value
* of every element on the Hash. */
// SCAN cursor [MATCH pattern] [COUNT count]
// SCAN、HSCAN、SSCAN、ZSCAN一类命令底层实现
// o对象必须是哈希对象或集合对象,否则命令将操作当前数据库
// 如果o不是NULL,那么说明他是一个哈希或集合对象,函数将跳过这些键对象,对参数进行分析
// 如果是哈希对象,返回返回的是键值对
void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
int i, j;
list *keys = listCreate(); //创建一个列表
listNode *node, *nextnode;
long count = 10;
sds pat = NULL;
int patlen = 0, use_pattern = 0;
dict *ht;
/* Object must be NULL (to iterate keys names), or the type of the object
* must be Set, Sorted Set, or Hash. */
// 输入类型的检查,要么迭代键名,要么当前集合对象,要么迭代哈希对象,要么迭代有序集合对象
serverAssert(o == NULL || o->type == OBJ_SET || o->type == OBJ_HASH ||
o->type == OBJ_ZSET);
/* Set i to the first option argument. The previous one is the cursor. */
// 计算第一个参数的下标,如果是键名,要条跳过该键
i = (o == NULL) ? 2 : 3; /* Skip the key argument if needed. */
/* Step 1: Parse options. */
// 1. 解析选项
while (i < c->argc) {
j = c->argc - i;
// 设定COUNT参数,COUNT 选项的作用就是让用户告知迭代命令, 在每次迭代中应该返回多少元素。
if (!strcasecmp(c->argv[i]->ptr, "count") && j >= 2) {
//保存个数到count
if (getLongFromObjectOrReply(c, c->argv[i+1], &count, NULL)
!= C_OK)
{
goto cleanup;
}
// 如果个数小于1,语法错误
if (count < 1) {
addReply(c,shared.syntaxerr);
goto cleanup;
}
i += 2; //参数跳过两个已经解析过的
// 设定MATCH参数,让命令只返回和给定模式相匹配的元素。
} else if (!strcasecmp(c->argv[i]->ptr, "match") && j >= 2) {
pat = c->argv[i+1]->ptr; //pattern字符串
patlen = sdslen(pat); //pattern字符串长度
/* The pattern always matches if it is exactly "*", so it is
* equivalent to disabling it. */
// 如果pattern是"*",就不用匹配,全部返回,设置为0
use_pattern = !(pat[0] == '*' && patlen == 1);
i += 2;
} else {
addReply(c,shared.syntaxerr);
goto cleanup;
}
}
/* Step 2: Iterate the collection.
*
* Note that if the object is encoded with a ziplist, intset, or any other
* representation that is not a hash table, we are sure that it is also
* composed of a small number of elements. So to avoid taking state we
* just return everything inside the object in a single call, setting the
* cursor to zero to signal the end of the iteration. */
/* Handle the case of a hash table. */
// 2.如果对象是ziplist、intset或其他而不是哈希表,那么这些类型只是包含少量的元素
// 我们一次将其所有的元素全部返回给调用者,并设置游标cursor为0,标示迭代完成
ht = NULL;
// 迭代目标是数据库
if (o == NULL) {
ht = c->db->dict;
// 迭代目标是HT编码的集合对象
} else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
ht = o->ptr;
// 迭代目标是HT编码的哈希对象
} else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
ht = o->ptr;
count *= 2; /* We return key / value for this type. */
// 迭代目标是skiplist编码的有序集合对象
} else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
ht = zs->dict;
count *= 2; /* We return key / value for this type. */
}
if (ht) {
void *privdata[2];
/* We set the max number of iterations to ten times the specified
* COUNT, so if the hash table is in a pathological state (very
* sparsely populated) we avoid to block too much time at the cost
* of returning no or very few elements. */
// 设置最大的迭代长度为10*count次
long maxiterations = count*10;
/* We pass two pointers to the callback: the list to which it will
* add new elements, and the object containing the dictionary so that
* it is possible to fetch more data in a type-dependent way. */
// 回调函数scanCallback的参数privdata是一个数组,保存的是被迭代对象的键和值
// 回调函数scanCallback的另一个参数,是一个字典对象
// 回调函数scanCallback的作用,从字典对象中将键值对提取出来,不用管字典对象是什么数据类型
privdata[0] = keys;
privdata[1] = o;
// 循环扫描ht,从游标cursor开始,调用指定的scanCallback函数,提出ht中的数据到刚开始创建的列表keys中
do {
cursor = dictScan(ht, cursor, scanCallback, privdata);
} while (cursor &&
maxiterations-- &&
listLength(keys) < (unsigned long)count);//没迭代完,或没迭代够count,就继续循环
// 如果是集合对象但编码不是HT是整数集合
} else if (o->type == OBJ_SET) {
int pos = 0;
int64_t ll;
// 将整数值取出来,构建成字符串对象加入到keys列表中,游标设置为0,表示迭代完成
while(intsetGet(o->ptr,pos++,&ll))
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
cursor = 0;
// 如果是哈希对象,或有序集合对象,但是编码都不是HT,是ziplist
} else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
unsigned char *p = ziplistIndex(o->ptr,0);
unsigned char *vstr;
unsigned int vlen;
long long vll;
while(p) {
// 将值取出来,根据不同类型的值,构建成相同的字符串对象,加入到keys列表中
ziplistGet(p,&vstr,&vlen,&vll);
listAddNodeTail(keys,
(vstr != NULL) ? createStringObject((char*)vstr,vlen) :
createStringObjectFromLongLong(vll));
p = ziplistNext(o->ptr,p);
}
cursor = 0;
} else {
serverPanic("Not handled encoding in SCAN.");
}
/* Step 3: Filter elements. */
// 3. 如果设置MATCH参数,要进行过滤
node = listFirst(keys); //链表首节点地址
while (node) {
robj *kobj = listNodeValue(node); //key对象
nextnode = listNextNode(node); //下一个节点地址
int filter = 0; //默认为不过滤
/* Filter element if it does not match the pattern. */
//pattern不是"*"因此要过滤
if (!filter && use_pattern) {
// 如果kobj是字符串对象
if (sdsEncodedObject(kobj)) {
// kobj的值不匹配pattern,设置过滤标志
if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0))
filter = 1;
// 如果kobj是整数对象
} else {
char buf[LONG_STR_SIZE];
int len;
serverAssert(kobj->encoding == OBJ_ENCODING_INT);
// 将整数转换为字符串类型,保存到buf中
len = ll2string(buf,sizeof(buf),(long)kobj->ptr);
//buf的值不匹配pattern,设置过滤标志
if (!stringmatchlen(pat, patlen, buf, len, 0)) filter = 1;
}
}
/* Filter element if it is an expired key. */
// 迭代目标是数据库,如果kobj是过期键,则过滤
if (!filter && o == NULL && expireIfNeeded(c->db, kobj)) filter = 1;
/* Remove the element and its associted value if needed. */
// 如果该键满足了上述的过滤条件,那么将其从keys列表删除并释放
if (filter) {
decrRefCount(kobj);
listDelNode(keys, node);
}
/* If this is a hash or a sorted set, we have a flat list of
* key-value elements, so if this element was filtered, remove the
* value, or skip it if it was not filtered: we only match keys. */
// 如果当前迭代目标是有序集合或哈希对象,因此keys列表中保存的是键值对,如果key键对象被过滤,值对象也应当被过滤
if (o && (o->type == OBJ_ZSET || o->type == OBJ_HASH)) {
node = nextnode;
nextnode = listNextNode(node); //值对象的节点地址
// 如果该键满足了上述的过滤条件,那么将其从keys列表删除并释放
if (filter) {
kobj = listNodeValue(node); //取出值对象
decrRefCount(kobj);
listDelNode(keys, node); //删除
}
}
node = nextnode;
}
/* Step 4: Reply to the client. */
// 4. 回复信息给client
addReplyMultiBulkLen(c, 2); //2部分,一个是游标,一个是列表
addReplyBulkLongLong(c,cursor); //回复游标
addReplyMultiBulkLen(c, listLength(keys)); //回复列表长度
//循环回复列表中的元素,并释放
while ((node = listFirst(keys)) != NULL) {
robj *kobj = listNodeValue(node);
addReplyBulk(c, kobj);
decrRefCount(kobj);
listDelNode(keys, node);
}
// 清理代码
cleanup:
listSetFreeMethod(keys,decrRefCountVoid); //设置特定的释放列表的方式decrRefCountVoid
listRelease(keys); //释放
}
/* The SCAN command completely relies on scanGenericCommand. */
// SCAN cursor [MATCH pattern] [COUNT count]
// SCAN 命令实现
void scanCommand(client *c) {
unsigned long cursor;
// 获取scan命令的游标,尝试取解析一个保存cursor参数中的游标,如果游标合法,保存到cursor中否则返回C_ERR
if (parseScanCursorOrReply(c,c->argv[1],&cursor) == C_ERR) return;
scanGenericCommand(c,NULL,cursor);
}
// DBSIZE 命令实现,返回当前数据库的 key 的数量。
void dbsizeCommand(client *c) {
addReplyLongLong(c,dictSize(c->db->dict)); //回复数据库中键值对字典的大小值
}
// LASTSAVE 返回最近一次 Redis 成功将数据保存到磁盘上的时间,以 UNIX 时间戳格式表示。
void lastsaveCommand(client *c) {
addReplyLongLong(c,server.lastsave);
}
// TYPE key 返回 key 所储存的值的类型。
// TYPE 命令实现
void typeCommand(client *c) {
robj *o;
char *type;
// 以读操作取出key参数的值对象,并且不修改键的使用时间
o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH);
if (o == NULL) {
type = "none";
} else {
switch(o->type) {
case OBJ_STRING: type = "string"; break;
case OBJ_LIST: type = "list"; break;
case OBJ_SET: type = "set"; break;
case OBJ_ZSET: type = "zset"; break;
case OBJ_HASH: type = "hash"; break;
default: type = "unknown"; break;
}
}
addReplyStatus(c,type); //返回类型字符串
}
// SHUTDOWN [SAVE|NOSAVE]
// 执行 SHUTDOWN SAVE 会强制让数据库执行保存操作,即使没有设定(configure)保存点
// 执行 SHUTDOWN NOSAVE 会阻止数据库执行保存操作,即使已经设定有一个或多个保存点(你可以将这一用法看作是强制停止服务器的一个假想的 ABORT 命令)
// SHUTDOWN 命令实现
void shutdownCommand(client *c) {
int flags = 0;
if (c->argc > 2) {
addReply(c,shared.syntaxerr); //语法错误
return;
} else if (c->argc == 2) {
//指定NOSAVE,停机不保存
if (!strcasecmp(c->argv[1]->ptr,"nosave")) {
flags |= SHUTDOWN_NOSAVE;
// 制定SAVE,停机保存
} else if (!strcasecmp(c->argv[1]->ptr,"save")) {
flags |= SHUTDOWN_SAVE;
} else {
addReply(c,shared.syntaxerr);
return;
}
}
/* When SHUTDOWN is called while the server is loading a dataset in
* memory we need to make sure no attempt is performed to save
* the dataset on shutdown (otherwise it could overwrite the current DB
* with half-read data).
*
* Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */
// 如果服务器正在载入数据集或者是正在处于集群模式
if (server.loading || server.sentinel_mode)
// 清除SHUTDOWN_SAVE标志,强制设置为SHUTDOWN_NOSAVE
flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE;
// 准备停机,处理停机前的操作,例如杀死子进程,刷新缓冲区,关闭socket等,调用exit(0)退出
if (prepareForShutdown(flags) == C_OK) exit(0);
addReplyError(c,"Errors trying to SHUTDOWN. Check logs.");
}
// RENAME key newkey
// RENAMENX key newkey
// RENAME、RENAMENX命令底层实现
void renameGenericCommand(client *c, int nx) {
robj *o;
long long expire;
int samekey = 0;
/* When source and dest key is the same, no operation is performed,
* if the key exists, however we still return an error on unexisting key. */
// key和newkey相同的话,设置samekey标志
if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) samekey = 1;
// 以写操作读取key的值对象
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
return;
// 如果key和newkey相同,nx为1发送0,否则为ok
if (samekey) {
addReply(c,nx ? shared.czero : shared.ok);
return;
}
// 增加值对象的引用计数,保护起来,用于关联newkey,以防删除了key顺带将值对象也删除
incrRefCount(o);
// 备份key的过期时间,将来作为newkey的过期时间
expire = getExpire(c->db,c->argv[1]);
// 判断newkey的值对象是否存在
if (lookupKeyWrite(c->db,c->argv[2]) != NULL) {
// 设置nx标志,则不符合已存在的条件,发送0
if (nx) {
decrRefCount(o);
addReply(c,shared.czero);
return;
}
/* Overwrite: delete the old key before creating the new one
* with the same name. */
dbDelete(c->db,c->argv[2]); //将旧的newkey对象删除
}
// 将newkey和key的值对象关联
dbAdd(c->db,c->argv[2],o);
// 如果newkey设置过过期时间,则为newkey设置过期时间
if (expire != -1) setExpire(c->db,c->argv[2],expire);
// 删除key
dbDelete(c->db,c->argv[1]);
// 发送这两个键被修改的信号
signalModifiedKey(c->db,c->argv[1]);
signalModifiedKey(c->db,c->argv[2]);
// 发送不同命令的事件通知
notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from",
c->argv[1],c->db->id);
notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_to",
c->argv[2],c->db->id);
server.dirty++; //更新脏键
addReply(c,nx ? shared.cone : shared.ok);
}
// RENAME key newkey
// RENAME 命令实现
void renameCommand(client *c) {
renameGenericCommand(c,0);
}
// RENAMENX key newkey
// RENAMENX 命令实现
void renamenxCommand(client *c) {
renameGenericCommand(c,1);
}
// MOVE key db 将当前数据库的 key 移动到给定的数据库 db 当中。
// MOVE 命令实现
void moveCommand(client *c) {
robj *o;
redisDb *src, *dst;
int srcid;
long long dbid, expire;
// 服务器处于集群模式,不支持多数据库
if (server.cluster_enabled) {
addReplyError(c,"MOVE is not allowed in cluster mode");
return;
}
/* Obtain source and target DB pointers */
// 获得源数据库和源数据库的id
src = c->db;
srcid = c->db->id;
// 将参数db的值保存到dbid,并且切换到该数据库中
if (getLongLongFromObject(c->argv[2],&dbid) == C_ERR ||
dbid < INT_MIN || dbid > INT_MAX ||
selectDb(c,dbid) == C_ERR)
{
addReply(c,shared.outofrangeerr);
return;
}
// 目标数据库
dst = c->db;
// 切换回源数据库
selectDb(c,srcid); /* Back to the source DB */
/* If the user is moving using as target the same
* DB as the source DB it is probably an error. */
// 如果前后切换的数据库相同,则返回有关错误
if (src == dst) {