1/*
2 * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include "server.h"
31#include "monotonic.h"
32#include "cluster.h"
33#include "slowlog.h"
34#include "bio.h"
35#include "latency.h"
36#include "atomicvar.h"
37#include "mt19937-64.h"
38#include "functions.h"
39#include "syscheck.h"
40
41#include <time.h>
42#include <signal.h>
43#include <sys/wait.h>
44#include <errno.h>
45#include <assert.h>
46#include <ctype.h>
47#include <stdarg.h>
48#include <arpa/inet.h>
49#include <sys/stat.h>
50#include <fcntl.h>
51#include <sys/file.h>
52#include <sys/time.h>
53#include <sys/resource.h>
54#include <sys/uio.h>
55#include <sys/un.h>
56#include <limits.h>
57#include <float.h>
58#include <math.h>
59#include <sys/resource.h>
60#include <sys/utsname.h>
61#include <locale.h>
62#include <sys/socket.h>
63#include <sys/resource.h>
64
65#ifdef __linux__
66#include <sys/mman.h>
67#endif
68
69#if defined(HAVE_SYSCTL_KIPC_SOMAXCONN) || defined(HAVE_SYSCTL_KERN_SOMAXCONN)
70#include <sys/sysctl.h>
71#endif
72
73/* Our shared "common" objects */
74
75struct sharedObjectsStruct shared;
76
77/* Global vars that are actually used as constants. The following double
78 * values are used for double on-disk serialization, and are initialized
79 * at runtime to avoid strange compiler optimizations. */
80
81double R_Zero, R_PosInf, R_NegInf, R_Nan;
82
83/*================================= Globals ================================= */
84
85/* Global vars */
86struct redisServer server; /* Server global state */
87
88/*============================ Internal prototypes ========================== */
89
90static inline int isShutdownInitiated(void);
91int isReadyToShutdown(void);
92int finishShutdown(void);
93const char *replstateToString(int replstate);
94
95/*============================ Utility functions ============================ */
96
97/* We use a private localtime implementation which is fork-safe. The logging
98 * function of Redis may be called from other threads. */
99void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst);
100
101/* Low level logging. To use only for very big messages, otherwise
102 * serverLog() is to prefer. */
103void serverLogRaw(int level, const char *msg) {
104 const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
105 const char *c = ".-*#";
106 FILE *fp;
107 char buf[64];
108 int rawmode = (level & LL_RAW);
109 int log_to_stdout = server.logfile[0] == '\0';
110
111 level &= 0xff; /* clear flags */
112 if (level < server.verbosity) return;
113
114 fp = log_to_stdout ? stdout : fopen(server.logfile,"a");
115 if (!fp) return;
116
117 if (rawmode) {
118 fprintf(fp,"%s",msg);
119 } else {
120 int off;
121 struct timeval tv;
122 int role_char;
123 pid_t pid = getpid();
124
125 gettimeofday(&tv,NULL);
126 struct tm tm;
127 nolocks_localtime(&tm,tv.tv_sec,server.timezone,server.daylight_active);
128 off = strftime(buf,sizeof(buf),"%d %b %Y %H:%M:%S.",&tm);
129 snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
130 if (server.sentinel_mode) {
131 role_char = 'X'; /* Sentinel. */
132 } else if (pid != server.pid) {
133 role_char = 'C'; /* RDB / AOF writing child. */
134 } else {
135 role_char = (server.masterhost ? 'S':'M'); /* Slave or Master. */
136 }
137 fprintf(fp,"%d:%c %s %c %s\n",
138 (int)getpid(),role_char, buf,c[level],msg);
139 }
140 fflush(fp);
141
142 if (!log_to_stdout) fclose(fp);
143 if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
144}
145
146/* Like serverLogRaw() but with printf-alike support. This is the function that
147 * is used across the code. The raw version is only used in order to dump
148 * the INFO output on crash. */
149void _serverLog(int level, const char *fmt, ...) {
150 va_list ap;
151 char msg[LOG_MAX_LEN];
152
153 va_start(ap, fmt);
154 vsnprintf(msg, sizeof(msg), fmt, ap);
155 va_end(ap);
156
157 serverLogRaw(level,msg);
158}
159
160/* Log a fixed message without printf-alike capabilities, in a way that is
161 * safe to call from a signal handler.
162 *
163 * We actually use this only for signals that are not fatal from the point
164 * of view of Redis. Signals that are going to kill the server anyway and
165 * where we need printf-alike features are served by serverLog(). */
166void serverLogFromHandler(int level, const char *msg) {
167 int fd;
168 int log_to_stdout = server.logfile[0] == '\0';
169 char buf[64];
170
171 if ((level&0xff) < server.verbosity || (log_to_stdout && server.daemonize))
172 return;
173 fd = log_to_stdout ? STDOUT_FILENO :
174 open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
175 if (fd == -1) return;
176 ll2string(buf,sizeof(buf),getpid());
177 if (write(fd,buf,strlen(buf)) == -1) goto err;
178 if (write(fd,":signal-handler (",17) == -1) goto err;
179 ll2string(buf,sizeof(buf),time(NULL));
180 if (write(fd,buf,strlen(buf)) == -1) goto err;
181 if (write(fd,") ",2) == -1) goto err;
182 if (write(fd,msg,strlen(msg)) == -1) goto err;
183 if (write(fd,"\n",1) == -1) goto err;
184err:
185 if (!log_to_stdout) close(fd);
186}
187
188/* Return the UNIX time in microseconds */
189long long ustime(void) {
190 struct timeval tv;
191 long long ust;
192
193 gettimeofday(&tv, NULL);
194 ust = ((long long)tv.tv_sec)*1000000;
195 ust += tv.tv_usec;
196 return ust;
197}
198
199/* Return the UNIX time in milliseconds */
200mstime_t mstime(void) {
201 return ustime()/1000;
202}
203
204/* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
205 * exit(), because the latter may interact with the same file objects used by
206 * the parent process. However if we are testing the coverage normal exit() is
207 * used in order to obtain the right coverage information. */
208void exitFromChild(int retcode) {
209#ifdef COVERAGE_TEST
210 exit(retcode);
211#else
212 _exit(retcode);
213#endif
214}
215
216/*====================== Hash table type implementation ==================== */
217
218/* This is a hash table type that uses the SDS dynamic strings library as
219 * keys and redis objects as values (objects can hold SDS strings,
220 * lists, sets). */
221
222void dictVanillaFree(dict *d, void *val)
223{
224 UNUSED(d);
225 zfree(val);
226}
227
228void dictListDestructor(dict *d, void *val)
229{
230 UNUSED(d);
231 listRelease((list*)val);
232}
233
234int dictSdsKeyCompare(dict *d, const void *key1,
235 const void *key2)
236{
237 int l1,l2;
238 UNUSED(d);
239
240 l1 = sdslen((sds)key1);
241 l2 = sdslen((sds)key2);
242 if (l1 != l2) return 0;
243 return memcmp(key1, key2, l1) == 0;
244}
245
246/* A case insensitive version used for the command lookup table and other
247 * places where case insensitive non binary-safe comparison is needed. */
248int dictSdsKeyCaseCompare(dict *d, const void *key1,
249 const void *key2)
250{
251 UNUSED(d);
252 return strcasecmp(key1, key2) == 0;
253}
254
255void dictObjectDestructor(dict *d, void *val)
256{
257 UNUSED(d);
258 if (val == NULL) return; /* Lazy freeing will set value to NULL. */
259 decrRefCount(val);
260}
261
262void dictSdsDestructor(dict *d, void *val)
263{
264 UNUSED(d);
265 sdsfree(val);
266}
267
268void *dictSdsDup(dict *d, const void *key) {
269 UNUSED(d);
270 return sdsdup((const sds) key);
271}
272
273int dictObjKeyCompare(dict *d, const void *key1,
274 const void *key2)
275{
276 const robj *o1 = key1, *o2 = key2;
277 return dictSdsKeyCompare(d, o1->ptr,o2->ptr);
278}
279
280uint64_t dictObjHash(const void *key) {
281 const robj *o = key;
282 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
283}
284
285uint64_t dictSdsHash(const void *key) {
286 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
287}
288
289uint64_t dictSdsCaseHash(const void *key) {
290 return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
291}
292
293/* Dict hash function for null terminated string */
294uint64_t distCStrHash(const void *key) {
295 return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
296}
297
298/* Dict hash function for null terminated string */
299uint64_t distCStrCaseHash(const void *key) {
300 return dictGenCaseHashFunction((unsigned char*)key, strlen((char*)key));
301}
302
303/* Dict compare function for null terminated string */
304int distCStrKeyCompare(dict *d, const void *key1, const void *key2) {
305 int l1,l2;
306 UNUSED(d);
307
308 l1 = strlen((char*)key1);
309 l2 = strlen((char*)key2);
310 if (l1 != l2) return 0;
311 return memcmp(key1, key2, l1) == 0;
312}
313
314/* Dict case insensitive compare function for null terminated string */
315int distCStrKeyCaseCompare(dict *d, const void *key1, const void *key2) {
316 UNUSED(d);
317 return strcasecmp(key1, key2) == 0;
318}
319
320int dictEncObjKeyCompare(dict *d, const void *key1, const void *key2)
321{
322 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
323 int cmp;
324
325 if (o1->encoding == OBJ_ENCODING_INT &&
326 o2->encoding == OBJ_ENCODING_INT)
327 return o1->ptr == o2->ptr;
328
329 /* Due to OBJ_STATIC_REFCOUNT, we avoid calling getDecodedObject() without
330 * good reasons, because it would incrRefCount() the object, which
331 * is invalid. So we check to make sure dictFind() works with static
332 * objects as well. */
333 if (o1->refcount != OBJ_STATIC_REFCOUNT) o1 = getDecodedObject(o1);
334 if (o2->refcount != OBJ_STATIC_REFCOUNT) o2 = getDecodedObject(o2);
335 cmp = dictSdsKeyCompare(d,o1->ptr,o2->ptr);
336 if (o1->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o1);
337 if (o2->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o2);
338 return cmp;
339}
340
341uint64_t dictEncObjHash(const void *key) {
342 robj *o = (robj*) key;
343
344 if (sdsEncodedObject(o)) {
345 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
346 } else if (o->encoding == OBJ_ENCODING_INT) {
347 char buf[32];
348 int len;
349
350 len = ll2string(buf,32,(long)o->ptr);
351 return dictGenHashFunction((unsigned char*)buf, len);
352 } else {
353 serverPanic("Unknown string encoding");
354 }
355}
356
357/* Return 1 if currently we allow dict to expand. Dict may allocate huge
358 * memory to contain hash buckets when dict expands, that may lead redis
359 * rejects user's requests or evicts some keys, we can stop dict to expand
360 * provisionally if used memory will be over maxmemory after dict expands,
361 * but to guarantee the performance of redis, we still allow dict to expand
362 * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */
363int dictExpandAllowed(size_t moreMem, double usedRatio) {
364 if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
365 return !overMaxmemoryAfterAlloc(moreMem);
366 } else {
367 return 1;
368 }
369}
370
371/* Returns the size of the DB dict entry metadata in bytes. In cluster mode, the
372 * metadata is used for constructing a doubly linked list of the dict entries
373 * belonging to the same cluster slot. See the Slot to Key API in cluster.c. */
374size_t dictEntryMetadataSize(dict *d) {
375 UNUSED(d);
376 /* NOTICE: this also affects overhead_ht_slot_to_keys in getMemoryOverheadData.
377 * If we ever add non-cluster related data here, that code must be modified too. */
378 return server.cluster_enabled ? sizeof(clusterDictEntryMetadata) : 0;
379}
380
381/* Generic hash table type where keys are Redis Objects, Values
382 * dummy pointers. */
383dictType objectKeyPointerValueDictType = {
384 dictEncObjHash, /* hash function */
385 NULL, /* key dup */
386 NULL, /* val dup */
387 dictEncObjKeyCompare, /* key compare */
388 dictObjectDestructor, /* key destructor */
389 NULL, /* val destructor */
390 NULL /* allow to expand */
391};
392
393/* Like objectKeyPointerValueDictType(), but values can be destroyed, if
394 * not NULL, calling zfree(). */
395dictType objectKeyHeapPointerValueDictType = {
396 dictEncObjHash, /* hash function */
397 NULL, /* key dup */
398 NULL, /* val dup */
399 dictEncObjKeyCompare, /* key compare */
400 dictObjectDestructor, /* key destructor */
401 dictVanillaFree, /* val destructor */
402 NULL /* allow to expand */
403};
404
405/* Set dictionary type. Keys are SDS strings, values are not used. */
406dictType setDictType = {
407 dictSdsHash, /* hash function */
408 NULL, /* key dup */
409 NULL, /* val dup */
410 dictSdsKeyCompare, /* key compare */
411 dictSdsDestructor, /* key destructor */
412 NULL /* val destructor */
413};
414
415/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
416dictType zsetDictType = {
417 dictSdsHash, /* hash function */
418 NULL, /* key dup */
419 NULL, /* val dup */
420 dictSdsKeyCompare, /* key compare */
421 NULL, /* Note: SDS string shared & freed by skiplist */
422 NULL, /* val destructor */
423 NULL /* allow to expand */
424};
425
426/* Db->dict, keys are sds strings, vals are Redis objects. */
427dictType dbDictType = {
428 dictSdsHash, /* hash function */
429 NULL, /* key dup */
430 NULL, /* val dup */
431 dictSdsKeyCompare, /* key compare */
432 dictSdsDestructor, /* key destructor */
433 dictObjectDestructor, /* val destructor */
434 dictExpandAllowed, /* allow to expand */
435 dictEntryMetadataSize /* size of entry metadata in bytes */
436};
437
438/* Db->expires */
439dictType dbExpiresDictType = {
440 dictSdsHash, /* hash function */
441 NULL, /* key dup */
442 NULL, /* val dup */
443 dictSdsKeyCompare, /* key compare */
444 NULL, /* key destructor */
445 NULL, /* val destructor */
446 dictExpandAllowed /* allow to expand */
447};
448
449/* Command table. sds string -> command struct pointer. */
450dictType commandTableDictType = {
451 dictSdsCaseHash, /* hash function */
452 NULL, /* key dup */
453 NULL, /* val dup */
454 dictSdsKeyCaseCompare, /* key compare */
455 dictSdsDestructor, /* key destructor */
456 NULL, /* val destructor */
457 NULL /* allow to expand */
458};
459
460/* Hash type hash table (note that small hashes are represented with listpacks) */
461dictType hashDictType = {
462 dictSdsHash, /* hash function */
463 NULL, /* key dup */
464 NULL, /* val dup */
465 dictSdsKeyCompare, /* key compare */
466 dictSdsDestructor, /* key destructor */
467 dictSdsDestructor, /* val destructor */
468 NULL /* allow to expand */
469};
470
471/* Dict type without destructor */
472dictType sdsReplyDictType = {
473 dictSdsHash, /* hash function */
474 NULL, /* key dup */
475 NULL, /* val dup */
476 dictSdsKeyCompare, /* key compare */
477 NULL, /* key destructor */
478 NULL, /* val destructor */
479 NULL /* allow to expand */
480};
481
482/* Keylist hash table type has unencoded redis objects as keys and
483 * lists as values. It's used for blocking operations (BLPOP) and to
484 * map swapped keys to a list of clients waiting for this keys to be loaded. */
485dictType keylistDictType = {
486 dictObjHash, /* hash function */
487 NULL, /* key dup */
488 NULL, /* val dup */
489 dictObjKeyCompare, /* key compare */
490 dictObjectDestructor, /* key destructor */
491 dictListDestructor, /* val destructor */
492 NULL /* allow to expand */
493};
494
495/* Modules system dictionary type. Keys are module name,
496 * values are pointer to RedisModule struct. */
497dictType modulesDictType = {
498 dictSdsCaseHash, /* hash function */
499 NULL, /* key dup */
500 NULL, /* val dup */
501 dictSdsKeyCaseCompare, /* key compare */
502 dictSdsDestructor, /* key destructor */
503 NULL, /* val destructor */
504 NULL /* allow to expand */
505};
506
507/* Migrate cache dict type. */
508dictType migrateCacheDictType = {
509 dictSdsHash, /* hash function */
510 NULL, /* key dup */
511 NULL, /* val dup */
512 dictSdsKeyCompare, /* key compare */
513 dictSdsDestructor, /* key destructor */
514 NULL, /* val destructor */
515 NULL /* allow to expand */
516};
517
518/* Dict for for case-insensitive search using null terminated C strings.
519 * The keys stored in dict are sds though. */
520dictType stringSetDictType = {
521 distCStrCaseHash, /* hash function */
522 NULL, /* key dup */
523 NULL, /* val dup */
524 distCStrKeyCaseCompare, /* key compare */
525 dictSdsDestructor, /* key destructor */
526 NULL, /* val destructor */
527 NULL /* allow to expand */
528};
529
530/* Dict for for case-insensitive search using null terminated C strings.
531 * The key and value do not have a destructor. */
532dictType externalStringType = {
533 distCStrCaseHash, /* hash function */
534 NULL, /* key dup */
535 NULL, /* val dup */
536 distCStrKeyCaseCompare, /* key compare */
537 NULL, /* key destructor */
538 NULL, /* val destructor */
539 NULL /* allow to expand */
540};
541
542/* Dict for case-insensitive search using sds objects with a zmalloc
543 * allocated object as the value. */
544dictType sdsHashDictType = {
545 dictSdsCaseHash, /* hash function */
546 NULL, /* key dup */
547 NULL, /* val dup */
548 dictSdsKeyCaseCompare, /* key compare */
549 dictSdsDestructor, /* key destructor */
550 dictVanillaFree, /* val destructor */
551 NULL /* allow to expand */
552};
553
554int htNeedsResize(dict *dict) {
555 long long size, used;
556
557 size = dictSlots(dict);
558 used = dictSize(dict);
559 return (size > DICT_HT_INITIAL_SIZE &&
560 (used*100/size < HASHTABLE_MIN_FILL));
561}
562
563/* If the percentage of used slots in the HT reaches HASHTABLE_MIN_FILL
564 * we resize the hash table to save memory */
565void tryResizeHashTables(int dbid) {
566 if (htNeedsResize(server.db[dbid].dict))
567 dictResize(server.db[dbid].dict);
568 if (htNeedsResize(server.db[dbid].expires))
569 dictResize(server.db[dbid].expires);
570}
571
572/* Our hash table implementation performs rehashing incrementally while
573 * we write/read from the hash table. Still if the server is idle, the hash
574 * table will use two tables for a long time. So we try to use 1 millisecond
575 * of CPU time at every call of this function to perform some rehashing.
576 *
577 * The function returns 1 if some rehashing was performed, otherwise 0
578 * is returned. */
579int incrementallyRehash(int dbid) {
580 /* Keys dictionary */
581 if (dictIsRehashing(server.db[dbid].dict)) {
582 dictRehashMilliseconds(server.db[dbid].dict,1);
583 return 1; /* already used our millisecond for this loop... */
584 }
585 /* Expires */
586 if (dictIsRehashing(server.db[dbid].expires)) {
587 dictRehashMilliseconds(server.db[dbid].expires,1);
588 return 1; /* already used our millisecond for this loop... */
589 }
590 return 0;
591}
592
593/* This function is called once a background process of some kind terminates,
594 * as we want to avoid resizing the hash tables when there is a child in order
595 * to play well with copy-on-write (otherwise when a resize happens lots of
596 * memory pages are copied). The goal of this function is to update the ability
597 * for dict.c to resize the hash tables accordingly to the fact we have an
598 * active fork child running. */
599void updateDictResizePolicy(void) {
600 if (!hasActiveChildProcess())
601 dictEnableResize();
602 else
603 dictDisableResize();
604}
605
606const char *strChildType(int type) {
607 switch(type) {
608 case CHILD_TYPE_RDB: return "RDB";
609 case CHILD_TYPE_AOF: return "AOF";
610 case CHILD_TYPE_LDB: return "LDB";
611 case CHILD_TYPE_MODULE: return "MODULE";
612 default: return "Unknown";
613 }
614}
615
616/* Return true if there are active children processes doing RDB saving,
617 * AOF rewriting, or some side process spawned by a loaded module. */
618int hasActiveChildProcess() {
619 return server.child_pid != -1;
620}
621
622void resetChildState() {
623 server.child_type = CHILD_TYPE_NONE;
624 server.child_pid = -1;
625 server.stat_current_cow_peak = 0;
626 server.stat_current_cow_bytes = 0;
627 server.stat_current_cow_updated = 0;
628 server.stat_current_save_keys_processed = 0;
629 server.stat_module_progress = 0;
630 server.stat_current_save_keys_total = 0;
631 updateDictResizePolicy();
632 closeChildInfoPipe();
633 moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
634 REDISMODULE_SUBEVENT_FORK_CHILD_DIED,
635 NULL);
636}
637
638/* Return if child type is mutually exclusive with other fork children */
639int isMutuallyExclusiveChildType(int type) {
640 return type == CHILD_TYPE_RDB || type == CHILD_TYPE_AOF || type == CHILD_TYPE_MODULE;
641}
642
643/* Returns true when we're inside a long command that yielded to the event loop. */
644int isInsideYieldingLongCommand() {
645 return scriptIsTimedout() || server.busy_module_yield_flags;
646}
647
648/* Return true if this instance has persistence completely turned off:
649 * both RDB and AOF are disabled. */
650int allPersistenceDisabled(void) {
651 return server.saveparamslen == 0 && server.aof_state == AOF_OFF;
652}
653
654/* ======================= Cron: called every 100 ms ======================== */
655
656/* Add a sample to the operations per second array of samples. */
657void trackInstantaneousMetric(int metric, long long current_reading) {
658 long long now = mstime();
659 long long t = now - server.inst_metric[metric].last_sample_time;
660 long long ops = current_reading -
661 server.inst_metric[metric].last_sample_count;
662 long long ops_sec;
663
664 ops_sec = t > 0 ? (ops*1000/t) : 0;
665
666 server.inst_metric[metric].samples[server.inst_metric[metric].idx] =
667 ops_sec;
668 server.inst_metric[metric].idx++;
669 server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
670 server.inst_metric[metric].last_sample_time = now;
671 server.inst_metric[metric].last_sample_count = current_reading;
672}
673
674/* Return the mean of all the samples. */
675long long getInstantaneousMetric(int metric) {
676 int j;
677 long long sum = 0;
678
679 for (j = 0; j < STATS_METRIC_SAMPLES; j++)
680 sum += server.inst_metric[metric].samples[j];
681 return sum / STATS_METRIC_SAMPLES;
682}
683
684/* The client query buffer is an sds.c string that can end with a lot of
685 * free space not used, this function reclaims space if needed.
686 *
687 * The function always returns 0 as it never terminates the client. */
688int clientsCronResizeQueryBuffer(client *c) {
689 size_t querybuf_size = sdsalloc(c->querybuf);
690 time_t idletime = server.unixtime - c->lastinteraction;
691
692 /* Only resize the query buffer if the buffer is actually wasting at least a
693 * few kbytes */
694 if (sdsavail(c->querybuf) > 1024*4) {
695 /* There are two conditions to resize the query buffer: */
696 if (idletime > 2) {
697 /* 1) Query is idle for a long time. */
698 c->querybuf = sdsRemoveFreeSpace(c->querybuf);
699 } else if (querybuf_size > PROTO_RESIZE_THRESHOLD && querybuf_size/2 > c->querybuf_peak) {
700 /* 2) Query buffer is too big for latest peak and is larger than
701 * resize threshold. Trim excess space but only up to a limit,
702 * not below the recent peak and current c->querybuf (which will
703 * be soon get used). If we're in the middle of a bulk then make
704 * sure not to resize to less than the bulk length. */
705 size_t resize = sdslen(c->querybuf);
706 if (resize < c->querybuf_peak) resize = c->querybuf_peak;
707 if (c->bulklen != -1 && resize < (size_t)c->bulklen) resize = c->bulklen;
708 c->querybuf = sdsResize(c->querybuf, resize);
709 }
710 }
711
712 /* Reset the peak again to capture the peak memory usage in the next
713 * cycle. */
714 c->querybuf_peak = sdslen(c->querybuf);
715 /* We reset to either the current used, or currently processed bulk size,
716 * which ever is bigger. */
717 if (c->bulklen != -1 && (size_t)c->bulklen > c->querybuf_peak)
718 c->querybuf_peak = c->bulklen;
719 return 0;
720}
721
722/* The client output buffer can be adjusted to better fit the memory requirements.
723 *
724 * the logic is:
725 * in case the last observed peak size of the buffer equals the buffer size - we double the size
726 * in case the last observed peak size of the buffer is less than half the buffer size - we shrink by half.
727 * The buffer peak will be reset back to the buffer position every server.reply_buffer_peak_reset_time milliseconds
728 * The function always returns 0 as it never terminates the client. */
729int clientsCronResizeOutputBuffer(client *c, mstime_t now_ms) {
730
731 size_t new_buffer_size = 0;
732 char *oldbuf = NULL;
733 const size_t buffer_target_shrink_size = c->buf_usable_size/2;
734 const size_t buffer_target_expand_size = c->buf_usable_size*2;
735
736 /* in case the resizing is disabled return immediately */
737 if(!server.reply_buffer_resizing_enabled)
738 return 0;
739
740 if (buffer_target_shrink_size >= PROTO_REPLY_MIN_BYTES &&
741 c->buf_peak < buffer_target_shrink_size )
742 {
743 new_buffer_size = max(PROTO_REPLY_MIN_BYTES,c->buf_peak+1);
744 server.stat_reply_buffer_shrinks++;
745 } else if (buffer_target_expand_size < PROTO_REPLY_CHUNK_BYTES*2 &&
746 c->buf_peak == c->buf_usable_size)
747 {
748 new_buffer_size = min(PROTO_REPLY_CHUNK_BYTES,buffer_target_expand_size);
749 server.stat_reply_buffer_expands++;
750 }
751
752 /* reset the peak value each server.reply_buffer_peak_reset_time seconds. in case the client will be idle
753 * it will start to shrink.
754 */
755 if (server.reply_buffer_peak_reset_time >=0 &&
756 now_ms - c->buf_peak_last_reset_time >= server.reply_buffer_peak_reset_time)
757 {
758 c->buf_peak = c->bufpos;
759 c->buf_peak_last_reset_time = now_ms;
760 }
761
762 if (new_buffer_size) {
763 oldbuf = c->buf;
764 c->buf = zmalloc_usable(new_buffer_size, &c->buf_usable_size);
765 memcpy(c->buf,oldbuf,c->bufpos);
766 zfree(oldbuf);
767 }
768 return 0;
769}
770
771/* This function is used in order to track clients using the biggest amount
772 * of memory in the latest few seconds. This way we can provide such information
773 * in the INFO output (clients section), without having to do an O(N) scan for
774 * all the clients.
775 *
776 * This is how it works. We have an array of CLIENTS_PEAK_MEM_USAGE_SLOTS slots
777 * where we track, for each, the biggest client output and input buffers we
778 * saw in that slot. Every slot corresponds to one of the latest seconds, since
779 * the array is indexed by doing UNIXTIME % CLIENTS_PEAK_MEM_USAGE_SLOTS.
780 *
781 * When we want to know what was recently the peak memory usage, we just scan
782 * such few slots searching for the maximum value. */
783#define CLIENTS_PEAK_MEM_USAGE_SLOTS 8
784size_t ClientsPeakMemInput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0};
785size_t ClientsPeakMemOutput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0};
786
787int clientsCronTrackExpansiveClients(client *c, int time_idx) {
788 size_t in_usage = sdsZmallocSize(c->querybuf) + c->argv_len_sum +
789 (c->argv ? zmalloc_size(c->argv) : 0);
790 size_t out_usage = getClientOutputBufferMemoryUsage(c);
791
792 /* Track the biggest values observed so far in this slot. */
793 if (in_usage > ClientsPeakMemInput[time_idx]) ClientsPeakMemInput[time_idx] = in_usage;
794 if (out_usage > ClientsPeakMemOutput[time_idx]) ClientsPeakMemOutput[time_idx] = out_usage;
795
796 return 0; /* This function never terminates the client. */
797}
798
799/* All normal clients are placed in one of the "mem usage buckets" according
800 * to how much memory they currently use. We use this function to find the
801 * appropriate bucket based on a given memory usage value. The algorithm simply
802 * does a log2(mem) to ge the bucket. This means, for examples, that if a
803 * client's memory usage doubles it's moved up to the next bucket, if it's
804 * halved we move it down a bucket.
805 * For more details see CLIENT_MEM_USAGE_BUCKETS documentation in server.h. */
806static inline clientMemUsageBucket *getMemUsageBucket(size_t mem) {
807 int size_in_bits = 8*(int)sizeof(mem);
808 int clz = mem > 0 ? __builtin_clzl(mem) : size_in_bits;
809 int bucket_idx = size_in_bits - clz;
810 if (bucket_idx > CLIENT_MEM_USAGE_BUCKET_MAX_LOG)
811 bucket_idx = CLIENT_MEM_USAGE_BUCKET_MAX_LOG;
812 else if (bucket_idx < CLIENT_MEM_USAGE_BUCKET_MIN_LOG)
813 bucket_idx = CLIENT_MEM_USAGE_BUCKET_MIN_LOG;
814 bucket_idx -= CLIENT_MEM_USAGE_BUCKET_MIN_LOG;
815 return &server.client_mem_usage_buckets[bucket_idx];
816}
817
818/* This is called both on explicit clients when something changed their buffers,
819 * so we can track clients' memory and enforce clients' maxmemory in real time,
820 * and also from the clientsCron. We call it from the cron so we have updated
821 * stats for non CLIENT_TYPE_NORMAL/PUBSUB clients and in case a configuration
822 * change requires us to evict a non-active client.
823 *
824 * This also adds the client to the correct memory usage bucket. Each bucket contains
825 * all clients with roughly the same amount of memory. This way we group
826 * together clients consuming about the same amount of memory and can quickly
827 * free them in case we reach maxmemory-clients (client eviction).
828 */
829int updateClientMemUsage(client *c) {
830 serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
831 size_t mem = getClientMemoryUsage(c, NULL);
832 int type = getClientType(c);
833
834 /* Remove the old value of the memory used by the client from the old
835 * category, and add it back. */
836 if (type != c->last_memory_type) {
837 server.stat_clients_type_memory[c->last_memory_type] -= c->last_memory_usage;
838 server.stat_clients_type_memory[type] += mem;
839 c->last_memory_type = type;
840 } else {
841 server.stat_clients_type_memory[type] += mem - c->last_memory_usage;
842 }
843
844 int allow_eviction =
845 (type == CLIENT_TYPE_NORMAL || type == CLIENT_TYPE_PUBSUB) &&
846 !(c->flags & CLIENT_NO_EVICT);
847
848 /* Update the client in the mem usage buckets */
849 if (c->mem_usage_bucket) {
850 c->mem_usage_bucket->mem_usage_sum -= c->last_memory_usage;
851 /* If this client can't be evicted then remove it from the mem usage
852 * buckets */
853 if (!allow_eviction) {
854 listDelNode(c->mem_usage_bucket->clients, c->mem_usage_bucket_node);
855 c->mem_usage_bucket = NULL;
856 c->mem_usage_bucket_node = NULL;
857 }
858 }
859 if (allow_eviction) {
860 clientMemUsageBucket *bucket = getMemUsageBucket(mem);
861 bucket->mem_usage_sum += mem;
862 if (bucket != c->mem_usage_bucket) {
863 if (c->mem_usage_bucket)
864 listDelNode(c->mem_usage_bucket->clients,
865 c->mem_usage_bucket_node);
866 c->mem_usage_bucket = bucket;
867 listAddNodeTail(bucket->clients, c);
868 c->mem_usage_bucket_node = listLast(bucket->clients);
869 }
870 }
871
872 /* Remember what we added, to remove it next time. */
873 c->last_memory_usage = mem;
874
875 return 0;
876}
877
878/* Return the max samples in the memory usage of clients tracked by
879 * the function clientsCronTrackExpansiveClients(). */
880void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
881 size_t i = 0, o = 0;
882 for (int j = 0; j < CLIENTS_PEAK_MEM_USAGE_SLOTS; j++) {
883 if (ClientsPeakMemInput[j] > i) i = ClientsPeakMemInput[j];
884 if (ClientsPeakMemOutput[j] > o) o = ClientsPeakMemOutput[j];
885 }
886 *in_usage = i;
887 *out_usage = o;
888}
889
890/* This function is called by serverCron() and is used in order to perform
891 * operations on clients that are important to perform constantly. For instance
892 * we use this function in order to disconnect clients after a timeout, including
893 * clients blocked in some blocking command with a non-zero timeout.
894 *
895 * The function makes some effort to process all the clients every second, even
896 * if this cannot be strictly guaranteed, since serverCron() may be called with
897 * an actual frequency lower than server.hz in case of latency events like slow
898 * commands.
899 *
900 * It is very important for this function, and the functions it calls, to be
901 * very fast: sometimes Redis has tens of hundreds of connected clients, and the
902 * default server.hz value is 10, so sometimes here we need to process thousands
903 * of clients per second, turning this function into a source of latency.
904 */
905#define CLIENTS_CRON_MIN_ITERATIONS 5
906void clientsCron(void) {
907 /* Try to process at least numclients/server.hz of clients
908 * per call. Since normally (if there are no big latency events) this
909 * function is called server.hz times per second, in the average case we
910 * process all the clients in 1 second. */
911 int numclients = listLength(server.clients);
912 int iterations = numclients/server.hz;
913 mstime_t now = mstime();
914
915 /* Process at least a few clients while we are at it, even if we need
916 * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract
917 * of processing each client once per second. */
918 if (iterations < CLIENTS_CRON_MIN_ITERATIONS)
919 iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ?
920 numclients : CLIENTS_CRON_MIN_ITERATIONS;
921
922
923 int curr_peak_mem_usage_slot = server.unixtime % CLIENTS_PEAK_MEM_USAGE_SLOTS;
924 /* Always zero the next sample, so that when we switch to that second, we'll
925 * only register samples that are greater in that second without considering
926 * the history of such slot.
927 *
928 * Note: our index may jump to any random position if serverCron() is not
929 * called for some reason with the normal frequency, for instance because
930 * some slow command is called taking multiple seconds to execute. In that
931 * case our array may end containing data which is potentially older
932 * than CLIENTS_PEAK_MEM_USAGE_SLOTS seconds: however this is not a problem
933 * since here we want just to track if "recently" there were very expansive
934 * clients from the POV of memory usage. */
935 int zeroidx = (curr_peak_mem_usage_slot+1) % CLIENTS_PEAK_MEM_USAGE_SLOTS;
936 ClientsPeakMemInput[zeroidx] = 0;
937 ClientsPeakMemOutput[zeroidx] = 0;
938
939
940 while(listLength(server.clients) && iterations--) {
941 client *c;
942 listNode *head;
943
944 /* Rotate the list, take the current head, process.
945 * This way if the client must be removed from the list it's the
946 * first element and we don't incur into O(N) computation. */
947 listRotateTailToHead(server.clients);
948 head = listFirst(server.clients);
949 c = listNodeValue(head);
950 /* The following functions do different service checks on the client.
951 * The protocol is that they return non-zero if the client was
952 * terminated. */
953 if (clientsCronHandleTimeout(c,now)) continue;
954 if (clientsCronResizeQueryBuffer(c)) continue;
955 if (clientsCronResizeOutputBuffer(c,now)) continue;
956
957 if (clientsCronTrackExpansiveClients(c, curr_peak_mem_usage_slot)) continue;
958
959 /* Iterating all the clients in getMemoryOverheadData() is too slow and
960 * in turn would make the INFO command too slow. So we perform this
961 * computation incrementally and track the (not instantaneous but updated
962 * to the second) total memory used by clients using clientsCron() in
963 * a more incremental way (depending on server.hz). */
964 if (updateClientMemUsage(c)) continue;
965 if (closeClientOnOutputBufferLimitReached(c, 0)) continue;
966 }
967}
968
969/* This function handles 'background' operations we are required to do
970 * incrementally in Redis databases, such as active key expiring, resizing,
971 * rehashing. */
972void databasesCron(void) {
973 /* Expire keys by random sampling. Not required for slaves
974 * as master will synthesize DELs for us. */
975 if (server.active_expire_enabled) {
976 if (iAmMaster()) {
977 activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
978 } else {
979 expireSlaveKeys();
980 }
981 }
982
983 /* Defrag keys gradually. */
984 activeDefragCycle();
985
986 /* Perform hash tables rehashing if needed, but only if there are no
987 * other processes saving the DB on disk. Otherwise rehashing is bad
988 * as will cause a lot of copy-on-write of memory pages. */
989 if (!hasActiveChildProcess()) {
990 /* We use global counters so if we stop the computation at a given
991 * DB we'll be able to start from the successive in the next
992 * cron loop iteration. */
993 static unsigned int resize_db = 0;
994 static unsigned int rehash_db = 0;
995 int dbs_per_call = CRON_DBS_PER_CALL;
996 int j;
997
998 /* Don't test more DBs than we have. */
999 if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;
1000
1001 /* Resize */
1002 for (j = 0; j < dbs_per_call; j++) {
1003 tryResizeHashTables(resize_db % server.dbnum);
1004 resize_db++;
1005 }
1006
1007 /* Rehash */
1008 if (server.activerehashing) {
1009 for (j = 0; j < dbs_per_call; j++) {
1010 int work_done = incrementallyRehash(rehash_db);
1011 if (work_done) {
1012 /* If the function did some work, stop here, we'll do
1013 * more at the next cron loop. */
1014 break;
1015 } else {
1016 /* If this db didn't need rehash, we'll try the next one. */
1017 rehash_db++;
1018 rehash_db %= server.dbnum;
1019 }
1020 }
1021 }
1022 }
1023}
1024
1025static inline void updateCachedTimeWithUs(int update_daylight_info, const long long ustime) {
1026 server.ustime = ustime;
1027 server.mstime = server.ustime / 1000;
1028 time_t unixtime = server.mstime / 1000;
1029 atomicSet(server.unixtime, unixtime);
1030
1031 /* To get information about daylight saving time, we need to call
1032 * localtime_r and cache the result. However calling localtime_r in this
1033 * context is safe since we will never fork() while here, in the main
1034 * thread. The logging function will call a thread safe version of
1035 * localtime that has no locks. */
1036 if (update_daylight_info) {
1037 struct tm tm;
1038 time_t ut = server.unixtime;
1039 localtime_r(&ut,&tm);
1040 server.daylight_active = tm.tm_isdst;
1041 }
1042}
1043
1044/* We take a cached value of the unix time in the global state because with
1045 * virtual memory and aging there is to store the current time in objects at
1046 * every object access, and accuracy is not needed. To access a global var is
1047 * a lot faster than calling time(NULL).
1048 *
1049 * This function should be fast because it is called at every command execution
1050 * in call(), so it is possible to decide if to update the daylight saving
1051 * info or not using the 'update_daylight_info' argument. Normally we update
1052 * such info only when calling this function from serverCron() but not when
1053 * calling it from call(). */
1054void updateCachedTime(int update_daylight_info) {
1055 const long long us = ustime();
1056 updateCachedTimeWithUs(update_daylight_info, us);
1057}
1058
1059void checkChildrenDone(void) {
1060 int statloc = 0;
1061 pid_t pid;
1062
1063 if ((pid = waitpid(-1, &statloc, WNOHANG)) != 0) {
1064 int exitcode = WIFEXITED(statloc) ? WEXITSTATUS(statloc) : -1;
1065 int bysignal = 0;
1066
1067 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
1068
1069 /* sigKillChildHandler catches the signal and calls exit(), but we
1070 * must make sure not to flag lastbgsave_status, etc incorrectly.
1071 * We could directly terminate the child process via SIGUSR1
1072 * without handling it */
1073 if (exitcode == SERVER_CHILD_NOERROR_RETVAL) {
1074 bysignal = SIGUSR1;
1075 exitcode = 1;
1076 }
1077
1078 if (pid == -1) {
1079 serverLog(LL_WARNING,"waitpid() returned an error: %s. "
1080 "child_type: %s, child_pid = %d",
1081 strerror(errno),
1082 strChildType(server.child_type),
1083 (int) server.child_pid);
1084 } else if (pid == server.child_pid) {
1085 if (server.child_type == CHILD_TYPE_RDB) {
1086 backgroundSaveDoneHandler(exitcode, bysignal);
1087 } else if (server.child_type == CHILD_TYPE_AOF) {
1088 backgroundRewriteDoneHandler(exitcode, bysignal);
1089 } else if (server.child_type == CHILD_TYPE_MODULE) {
1090 ModuleForkDoneHandler(exitcode, bysignal);
1091 } else {
1092 serverPanic("Unknown child type %d for child pid %d", server.child_type, server.child_pid);
1093 exit(1);
1094 }
1095 if (!bysignal && exitcode == 0) receiveChildInfo();
1096 resetChildState();
1097 } else {
1098 if (!ldbRemoveChild(pid)) {
1099 serverLog(LL_WARNING,
1100 "Warning, detected child with unmatched pid: %ld",
1101 (long) pid);
1102 }
1103 }
1104
1105 /* start any pending forks immediately. */
1106 replicationStartPendingFork();
1107 }
1108}
1109
1110/* Called from serverCron and cronUpdateMemoryStats to update cached memory metrics. */
1111void cronUpdateMemoryStats() {
1112 /* Record the max memory used since the server was started. */
1113 if (zmalloc_used_memory() > server.stat_peak_memory)
1114 server.stat_peak_memory = zmalloc_used_memory();
1115
1116 run_with_period(100) {
1117 /* Sample the RSS and other metrics here since this is a relatively slow call.
1118 * We must sample the zmalloc_used at the same time we take the rss, otherwise
1119 * the frag ratio calculate may be off (ratio of two samples at different times) */
1120 server.cron_malloc_stats.process_rss = zmalloc_get_rss();
1121 server.cron_malloc_stats.zmalloc_used = zmalloc_used_memory();
1122 /* Sampling the allocator info can be slow too.
1123 * The fragmentation ratio it'll show is potentially more accurate
1124 * it excludes other RSS pages such as: shared libraries, LUA and other non-zmalloc
1125 * allocations, and allocator reserved pages that can be pursed (all not actual frag) */
1126 zmalloc_get_allocator_info(&server.cron_malloc_stats.allocator_allocated,
1127 &server.cron_malloc_stats.allocator_active,
1128 &server.cron_malloc_stats.allocator_resident);
1129 /* in case the allocator isn't providing these stats, fake them so that
1130 * fragmentation info still shows some (inaccurate metrics) */
1131 if (!server.cron_malloc_stats.allocator_resident) {
1132 /* LUA memory isn't part of zmalloc_used, but it is part of the process RSS,
1133 * so we must deduct it in order to be able to calculate correct
1134 * "allocator fragmentation" ratio */
1135 size_t lua_memory = evalMemory();
1136 server.cron_malloc_stats.allocator_resident = server.cron_malloc_stats.process_rss - lua_memory;
1137 }
1138 if (!server.cron_malloc_stats.allocator_active)
1139 server.cron_malloc_stats.allocator_active = server.cron_malloc_stats.allocator_resident;
1140 if (!server.cron_malloc_stats.allocator_allocated)
1141 server.cron_malloc_stats.allocator_allocated = server.cron_malloc_stats.zmalloc_used;
1142 }
1143}
1144
1145/* This is our timer interrupt, called server.hz times per second.
1146 * Here is where we do a number of things that need to be done asynchronously.
1147 * For instance:
1148 *
1149 * - Active expired keys collection (it is also performed in a lazy way on
1150 * lookup).
1151 * - Software watchdog.
1152 * - Update some statistic.
1153 * - Incremental rehashing of the DBs hash tables.
1154 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
1155 * - Clients timeout of different kinds.
1156 * - Replication reconnection.
1157 * - Many more...
1158 *
1159 * Everything directly called here will be called server.hz times per second,
1160 * so in order to throttle execution of things we want to do less frequently
1161 * a macro is used: run_with_period(milliseconds) { .... }
1162 */
1163
1164int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
1165 int j;
1166 UNUSED(eventLoop);
1167 UNUSED(id);
1168 UNUSED(clientData);
1169
1170 /* Software watchdog: deliver the SIGALRM that will reach the signal
1171 * handler if we don't return here fast enough. */
1172 if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
1173
1174 /* Update the time cache. */
1175 updateCachedTime(1);
1176
1177 server.hz = server.config_hz;
1178 /* Adapt the server.hz value to the number of configured clients. If we have
1179 * many clients, we want to call serverCron() with an higher frequency. */
1180 if (server.dynamic_hz) {
1181 while (listLength(server.clients) / server.hz >
1182 MAX_CLIENTS_PER_CLOCK_TICK)
1183 {
1184 server.hz *= 2;
1185 if (server.hz > CONFIG_MAX_HZ) {
1186 server.hz = CONFIG_MAX_HZ;
1187 break;
1188 }
1189 }
1190 }
1191
1192 /* for debug purposes: skip actual cron work if pause_cron is on */
1193 if (server.pause_cron) return 1000/server.hz;
1194
1195 run_with_period(100) {
1196 long long stat_net_input_bytes, stat_net_output_bytes;
1197 long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
1198 atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
1199 atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
1200 atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
1201 atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
1202
1203 trackInstantaneousMetric(STATS_METRIC_COMMAND,server.stat_numcommands);
1204 trackInstantaneousMetric(STATS_METRIC_NET_INPUT,
1205 stat_net_input_bytes + stat_net_repl_input_bytes);
1206 trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT,
1207 stat_net_output_bytes + stat_net_repl_output_bytes);
1208 trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION,
1209 stat_net_repl_input_bytes);
1210 trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION,
1211 stat_net_repl_output_bytes);
1212 }
1213
1214 /* We have just LRU_BITS bits per object for LRU information.
1215 * So we use an (eventually wrapping) LRU clock.
1216 *
1217 * Note that even if the counter wraps it's not a big problem,
1218 * everything will still work but some object will appear younger
1219 * to Redis. However for this to happen a given object should never be
1220 * touched for all the time needed to the counter to wrap, which is
1221 * not likely.
1222 *
1223 * Note that you can change the resolution altering the
1224 * LRU_CLOCK_RESOLUTION define. */
1225 unsigned int lruclock = getLRUClock();
1226 atomicSet(server.lruclock,lruclock);
1227
1228 cronUpdateMemoryStats();
1229
1230 /* We received a SIGTERM or SIGINT, shutting down here in a safe way, as it is
1231 * not ok doing so inside the signal handler. */
1232 if (server.shutdown_asap && !isShutdownInitiated()) {
1233 int shutdownFlags = SHUTDOWN_NOFLAGS;
1234 if (server.last_sig_received == SIGINT && server.shutdown_on_sigint)
1235 shutdownFlags = server.shutdown_on_sigint;
1236 else if (server.last_sig_received == SIGTERM && server.shutdown_on_sigterm)
1237 shutdownFlags = server.shutdown_on_sigterm;
1238
1239 if (prepareForShutdown(shutdownFlags) == C_OK) exit(0);
1240 } else if (isShutdownInitiated()) {
1241 if (server.mstime >= server.shutdown_mstime || isReadyToShutdown()) {
1242 if (finishShutdown() == C_OK) exit(0);
1243 /* Shutdown failed. Continue running. An error has been logged. */
1244 }
1245 }
1246
1247 /* Show some info about non-empty databases */
1248 if (server.verbosity <= LL_VERBOSE) {
1249 run_with_period(5000) {
1250 for (j = 0; j < server.dbnum; j++) {
1251 long long size, used, vkeys;
1252
1253 size = dictSlots(server.db[j].dict);
1254 used = dictSize(server.db[j].dict);
1255 vkeys = dictSize(server.db[j].expires);
1256 if (used || vkeys) {
1257 serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
1258 }
1259 }
1260 }
1261 }
1262
1263 /* Show information about connected clients */
1264 if (!server.sentinel_mode) {
1265 run_with_period(5000) {
1266 serverLog(LL_DEBUG,
1267 "%lu clients connected (%lu replicas), %zu bytes in use",
1268 listLength(server.clients)-listLength(server.slaves),
1269 listLength(server.slaves),
1270 zmalloc_used_memory());
1271 }
1272 }
1273
1274 /* We need to do a few operations on clients asynchronously. */
1275 clientsCron();
1276
1277 /* Handle background operations on Redis databases. */
1278 databasesCron();
1279
1280 /* Start a scheduled AOF rewrite if this was requested by the user while
1281 * a BGSAVE was in progress. */
1282 if (!hasActiveChildProcess() &&
1283 server.aof_rewrite_scheduled &&
1284 !aofRewriteLimited())
1285 {
1286 rewriteAppendOnlyFileBackground();
1287 }
1288
1289 /* Check if a background saving or AOF rewrite in progress terminated. */
1290 if (hasActiveChildProcess() || ldbPendingChildren())
1291 {
1292 run_with_period(1000) receiveChildInfo();
1293 checkChildrenDone();
1294 } else {
1295 /* If there is not a background saving/rewrite in progress check if
1296 * we have to save/rewrite now. */
1297 for (j = 0; j < server.saveparamslen; j++) {
1298 struct saveparam *sp = server.saveparams+j;
1299
1300 /* Save if we reached the given amount of changes,
1301 * the given amount of seconds, and if the latest bgsave was
1302 * successful or if, in case of an error, at least
1303 * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
1304 if (server.dirty >= sp->changes &&
1305 server.unixtime-server.lastsave > sp->seconds &&
1306 (server.unixtime-server.lastbgsave_try >
1307 CONFIG_BGSAVE_RETRY_DELAY ||
1308 server.lastbgsave_status == C_OK))
1309 {
1310 serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
1311 sp->changes, (int)sp->seconds);
1312 rdbSaveInfo rsi, *rsiptr;
1313 rsiptr = rdbPopulateSaveInfo(&rsi);
1314 rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr);
1315 break;
1316 }
1317 }
1318
1319 /* Trigger an AOF rewrite if needed. */
1320 if (server.aof_state == AOF_ON &&
1321 !hasActiveChildProcess() &&
1322 server.aof_rewrite_perc &&
1323 server.aof_current_size > server.aof_rewrite_min_size)
1324 {
1325 long long base = server.aof_rewrite_base_size ?
1326 server.aof_rewrite_base_size : 1;
1327 long long growth = (server.aof_current_size*100/base) - 100;
1328 if (growth >= server.aof_rewrite_perc && !aofRewriteLimited()) {
1329 serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
1330 rewriteAppendOnlyFileBackground();
1331 }
1332 }
1333 }
1334 /* Just for the sake of defensive programming, to avoid forgetting to
1335 * call this function when needed. */
1336 updateDictResizePolicy();
1337
1338
1339 /* AOF postponed flush: Try at every cron cycle if the slow fsync
1340 * completed. */
1341 if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
1342 server.aof_flush_postponed_start)
1343 {
1344 flushAppendOnlyFile(0);
1345 }
1346
1347 /* AOF write errors: in this case we have a buffer to flush as well and
1348 * clear the AOF error in case of success to make the DB writable again,
1349 * however to try every second is enough in case of 'hz' is set to
1350 * a higher frequency. */
1351 run_with_period(1000) {
1352 if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
1353 server.aof_last_write_status == C_ERR)
1354 {
1355 flushAppendOnlyFile(0);
1356 }
1357 }
1358
1359 /* Clear the paused clients state if needed. */
1360 checkClientPauseTimeoutAndReturnIfPaused();
1361
1362 /* Replication cron function -- used to reconnect to master,
1363 * detect transfer failures, start background RDB transfers and so forth.
1364 *
1365 * If Redis is trying to failover then run the replication cron faster so
1366 * progress on the handshake happens more quickly. */
1367 if (server.failover_state != NO_FAILOVER) {
1368 run_with_period(100) replicationCron();
1369 } else {
1370 run_with_period(1000) replicationCron();
1371 }
1372
1373 /* Run the Redis Cluster cron. */
1374 run_with_period(100) {
1375 if (server.cluster_enabled) clusterCron();
1376 }
1377
1378 /* Run the Sentinel timer if we are in sentinel mode. */
1379 if (server.sentinel_mode) sentinelTimer();
1380
1381 /* Cleanup expired MIGRATE cached sockets. */
1382 run_with_period(1000) {
1383 migrateCloseTimedoutSockets();
1384 }
1385
1386 /* Stop the I/O threads if we don't have enough pending work. */
1387 stopThreadedIOIfNeeded();
1388
1389 /* Resize tracking keys table if needed. This is also done at every
1390 * command execution, but we want to be sure that if the last command
1391 * executed changes the value via CONFIG SET, the server will perform
1392 * the operation even if completely idle. */
1393 if (server.tracking_clients) trackingLimitUsedSlots();
1394
1395 /* Start a scheduled BGSAVE if the corresponding flag is set. This is
1396 * useful when we are forced to postpone a BGSAVE because an AOF
1397 * rewrite is in progress.
1398 *
1399 * Note: this code must be after the replicationCron() call above so
1400 * make sure when refactoring this file to keep this order. This is useful
1401 * because we want to give priority to RDB savings for replication. */
1402 if (!hasActiveChildProcess() &&
1403 server.rdb_bgsave_scheduled &&
1404 (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
1405 server.lastbgsave_status == C_OK))
1406 {
1407 rdbSaveInfo rsi, *rsiptr;
1408 rsiptr = rdbPopulateSaveInfo(&rsi);
1409 if (rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr) == C_OK)
1410 server.rdb_bgsave_scheduled = 0;
1411 }
1412
1413 run_with_period(100) {
1414 if (moduleCount()) modulesCron();
1415 }
1416
1417 /* Fire the cron loop modules event. */
1418 RedisModuleCronLoopV1 ei = {REDISMODULE_CRON_LOOP_VERSION,server.hz};
1419 moduleFireServerEvent(REDISMODULE_EVENT_CRON_LOOP,
1420 0,
1421 &ei);
1422
1423 server.cronloops++;
1424 return 1000/server.hz;
1425}
1426
1427
1428void blockingOperationStarts() {
1429 if(!server.blocking_op_nesting++){
1430 updateCachedTime(0);
1431 server.blocked_last_cron = server.mstime;
1432 }
1433}
1434
1435void blockingOperationEnds() {
1436 if(!(--server.blocking_op_nesting)){
1437 server.blocked_last_cron = 0;
1438 }
1439}
1440
1441/* This function fills in the role of serverCron during RDB or AOF loading, and
1442 * also during blocked scripts.
1443 * It attempts to do its duties at a similar rate as the configured server.hz,
1444 * and updates cronloops variable so that similarly to serverCron, the
1445 * run_with_period can be used. */
1446void whileBlockedCron() {
1447 /* Here we may want to perform some cron jobs (normally done server.hz times
1448 * per second). */
1449
1450 /* Since this function depends on a call to blockingOperationStarts, let's
1451 * make sure it was done. */
1452 serverAssert(server.blocked_last_cron);
1453
1454 /* In case we where called too soon, leave right away. This way one time
1455 * jobs after the loop below don't need an if. and we don't bother to start
1456 * latency monitor if this function is called too often. */
1457 if (server.blocked_last_cron >= server.mstime)
1458 return;
1459
1460 mstime_t latency;
1461 latencyStartMonitor(latency);
1462
1463 /* In some cases we may be called with big intervals, so we may need to do
1464 * extra work here. This is because some of the functions in serverCron rely
1465 * on the fact that it is performed every 10 ms or so. For instance, if
1466 * activeDefragCycle needs to utilize 25% cpu, it will utilize 2.5ms, so we
1467 * need to call it multiple times. */
1468 long hz_ms = 1000/server.hz;
1469 while (server.blocked_last_cron < server.mstime) {
1470
1471 /* Defrag keys gradually. */
1472 activeDefragCycle();
1473
1474 server.blocked_last_cron += hz_ms;
1475
1476 /* Increment cronloop so that run_with_period works. */
1477 server.cronloops++;
1478 }
1479
1480 /* Other cron jobs do not need to be done in a loop. No need to check
1481 * server.blocked_last_cron since we have an early exit at the top. */
1482
1483 /* Update memory stats during loading (excluding blocked scripts) */
1484 if (server.loading) cronUpdateMemoryStats();
1485
1486 latencyEndMonitor(latency);
1487 latencyAddSampleIfNeeded("while-blocked-cron",latency);
1488
1489 /* We received a SIGTERM during loading, shutting down here in a safe way,
1490 * as it isn't ok doing so inside the signal handler. */
1491 if (server.shutdown_asap && server.loading) {
1492 if (prepareForShutdown(SHUTDOWN_NOSAVE) == C_OK) exit(0);
1493 serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1494 server.shutdown_asap = 0;
1495 server.last_sig_received = 0;
1496 }
1497}
1498
1499static void sendGetackToReplicas(void) {
1500 robj *argv[3];
1501 argv[0] = shared.replconf;
1502 argv[1] = shared.getack;
1503 argv[2] = shared.special_asterick; /* Not used argument. */
1504 replicationFeedSlaves(server.slaves, server.slaveseldb, argv, 3);
1505}
1506
1507extern int ProcessingEventsWhileBlocked;
1508
1509/* This function gets called every time Redis is entering the
1510 * main loop of the event driven library, that is, before to sleep
1511 * for ready file descriptors.
1512 *
1513 * Note: This function is (currently) called from two functions:
1514 * 1. aeMain - The main server loop
1515 * 2. processEventsWhileBlocked - Process clients during RDB/AOF load
1516 *
1517 * If it was called from processEventsWhileBlocked we don't want
1518 * to perform all actions (For example, we don't want to expire
1519 * keys), but we do need to perform some actions.
1520 *
1521 * The most important is freeClientsInAsyncFreeQueue but we also
1522 * call some other low-risk functions. */
1523void beforeSleep(struct aeEventLoop *eventLoop) {
1524 UNUSED(eventLoop);
1525
1526 size_t zmalloc_used = zmalloc_used_memory();
1527 if (zmalloc_used > server.stat_peak_memory)
1528 server.stat_peak_memory = zmalloc_used;
1529
1530 /* Just call a subset of vital functions in case we are re-entering
1531 * the event loop from processEventsWhileBlocked(). Note that in this
1532 * case we keep track of the number of events we are processing, since
1533 * processEventsWhileBlocked() wants to stop ASAP if there are no longer
1534 * events to handle. */
1535 if (ProcessingEventsWhileBlocked) {
1536 uint64_t processed = 0;
1537 processed += handleClientsWithPendingReadsUsingThreads();
1538 processed += tlsProcessPendingData();
1539 if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
1540 flushAppendOnlyFile(0);
1541 processed += handleClientsWithPendingWrites();
1542 processed += freeClientsInAsyncFreeQueue();
1543 server.events_processed_while_blocked += processed;
1544 return;
1545 }
1546
1547 /* Handle precise timeouts of blocked clients. */
1548 handleBlockedClientsTimeout();
1549
1550 /* We should handle pending reads clients ASAP after event loop. */
1551 handleClientsWithPendingReadsUsingThreads();
1552
1553 /* Handle TLS pending data. (must be done before flushAppendOnlyFile) */
1554 tlsProcessPendingData();
1555
1556 /* If tls still has pending unread data don't sleep at all. */
1557 aeSetDontWait(server.el, tlsHasPendingData());
1558
1559 /* Call the Redis Cluster before sleep function. Note that this function
1560 * may change the state of Redis Cluster (from ok to fail or vice versa),
1561 * so it's a good idea to call it before serving the unblocked clients
1562 * later in this function. */
1563 if (server.cluster_enabled) clusterBeforeSleep();
1564
1565 /* Run a fast expire cycle (the called function will return
1566 * ASAP if a fast cycle is not needed). */
1567 if (server.active_expire_enabled && server.masterhost == NULL)
1568 activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
1569
1570 /* Unblock all the clients blocked for synchronous replication
1571 * in WAIT. */
1572 if (listLength(server.clients_waiting_acks))
1573 processClientsWaitingReplicas();
1574
1575 /* Check if there are clients unblocked by modules that implement
1576 * blocking commands. */
1577 if (moduleCount()) {
1578 moduleFireServerEvent(REDISMODULE_EVENT_EVENTLOOP,
1579 REDISMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP,
1580 NULL);
1581 moduleHandleBlockedClients();
1582 }
1583
1584 /* Try to process pending commands for clients that were just unblocked. */
1585 if (listLength(server.unblocked_clients))
1586 processUnblockedClients();
1587
1588 /* Send all the slaves an ACK request if at least one client blocked
1589 * during the previous event loop iteration. Note that we do this after
1590 * processUnblockedClients(), so if there are multiple pipelined WAITs
1591 * and the just unblocked WAIT gets blocked again, we don't have to wait
1592 * a server cron cycle in absence of other event loop events. See #6623.
1593 *
1594 * We also don't send the ACKs while clients are paused, since it can
1595 * increment the replication backlog, they'll be sent after the pause
1596 * if we are still the master. */
1597 if (server.get_ack_from_slaves && !checkClientPauseTimeoutAndReturnIfPaused()) {
1598 sendGetackToReplicas();
1599 server.get_ack_from_slaves = 0;
1600 }
1601
1602 /* We may have received updates from clients about their current offset. NOTE:
1603 * this can't be done where the ACK is received since failover will disconnect
1604 * our clients. */
1605 updateFailoverStatus();
1606
1607 /* Since we rely on current_client to send scheduled invalidation messages
1608 * we have to flush them after each command, so when we get here, the list
1609 * must be empty. */
1610 serverAssert(listLength(server.tracking_pending_keys) == 0);
1611
1612 /* Send the invalidation messages to clients participating to the
1613 * client side caching protocol in broadcasting (BCAST) mode. */
1614 trackingBroadcastInvalidationMessages();
1615
1616 /* Try to process blocked clients every once in while.
1617 *
1618 * Example: A module calls RM_SignalKeyAsReady from within a timer callback
1619 * (So we don't visit processCommand() at all).
1620 *
1621 * must be done before flushAppendOnlyFile, in case of appendfsync=always,
1622 * since the unblocked clients may write data. */
1623 handleClientsBlockedOnKeys();
1624
1625 /* Write the AOF buffer on disk,
1626 * must be done before handleClientsWithPendingWritesUsingThreads,
1627 * in case of appendfsync=always. */
1628 if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
1629 flushAppendOnlyFile(0);
1630
1631 /* Handle writes with pending output buffers. */
1632 handleClientsWithPendingWritesUsingThreads();
1633
1634 /* Close clients that need to be closed asynchronous */
1635 freeClientsInAsyncFreeQueue();
1636
1637 /* Incrementally trim replication backlog, 10 times the normal speed is
1638 * to free replication backlog as much as possible. */
1639 if (server.repl_backlog)
1640 incrementalTrimReplicationBacklog(10*REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
1641
1642 /* Disconnect some clients if they are consuming too much memory. */
1643 evictClients();
1644
1645 /* Before we are going to sleep, let the threads access the dataset by
1646 * releasing the GIL. Redis main thread will not touch anything at this
1647 * time. */
1648 if (moduleCount()) moduleReleaseGIL();
1649
1650 /* Do NOT add anything below moduleReleaseGIL !!! */
1651}
1652
1653/* This function is called immediately after the event loop multiplexing
1654 * API returned, and the control is going to soon return to Redis by invoking
1655 * the different events callbacks. */
1656void afterSleep(struct aeEventLoop *eventLoop) {
1657 UNUSED(eventLoop);
1658
1659 /* Do NOT add anything above moduleAcquireGIL !!! */
1660
1661 /* Acquire the modules GIL so that their threads won't touch anything. */
1662 if (!ProcessingEventsWhileBlocked) {
1663 if (moduleCount()) {
1664 mstime_t latency;
1665 latencyStartMonitor(latency);
1666
1667 moduleAcquireGIL();
1668 moduleFireServerEvent(REDISMODULE_EVENT_EVENTLOOP,
1669 REDISMODULE_SUBEVENT_EVENTLOOP_AFTER_SLEEP,
1670 NULL);
1671 latencyEndMonitor(latency);
1672 latencyAddSampleIfNeeded("module-acquire-GIL",latency);
1673 }
1674 }
1675}
1676
1677/* =========================== Server initialization ======================== */
1678
1679void createSharedObjects(void) {
1680 int j;
1681
1682 /* Shared command responses */
1683 shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
1684 shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
1685 shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"));
1686 shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n"));
1687 shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n"));
1688 shared.emptyarray = createObject(OBJ_STRING,sdsnew("*0\r\n"));
1689 shared.pong = createObject(OBJ_STRING,sdsnew("+PONG\r\n"));
1690 shared.queued = createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"));
1691 shared.emptyscan = createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
1692 shared.space = createObject(OBJ_STRING,sdsnew(" "));
1693 shared.plus = createObject(OBJ_STRING,sdsnew("+"));
1694
1695 /* Shared command error responses */
1696 shared.wrongtypeerr = createObject(OBJ_STRING,sdsnew(
1697 "-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"));
1698 shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
1699 shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
1700 "-ERR no such key\r\n"));
1701 shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
1702 "-ERR syntax error\r\n"));
1703 shared.sameobjecterr = createObject(OBJ_STRING,sdsnew(
1704 "-ERR source and destination objects are the same\r\n"));
1705 shared.outofrangeerr = createObject(OBJ_STRING,sdsnew(
1706 "-ERR index out of range\r\n"));
1707 shared.noscripterr = createObject(OBJ_STRING,sdsnew(
1708 "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
1709 shared.loadingerr = createObject(OBJ_STRING,sdsnew(
1710 "-LOADING Redis is loading the dataset in memory\r\n"));
1711 shared.slowevalerr = createObject(OBJ_STRING,sdsnew(
1712 "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
1713 shared.slowscripterr = createObject(OBJ_STRING,sdsnew(
1714 "-BUSY Redis is busy running a script. You can only call FUNCTION KILL or SHUTDOWN NOSAVE.\r\n"));
1715 shared.slowmoduleerr = createObject(OBJ_STRING,sdsnew(
1716 "-BUSY Redis is busy running a module command.\r\n"));
1717 shared.masterdownerr = createObject(OBJ_STRING,sdsnew(
1718 "-MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'.\r\n"));
1719 shared.bgsaveerr = createObject(OBJ_STRING,sdsnew(
1720 "-MISCONF Redis is configured to save RDB snapshots, but it's currently unable to persist to disk. Commands that may modify the data set are disabled, because this instance is configured to report errors during writes if RDB snapshotting fails (stop-writes-on-bgsave-error option). Please check the Redis logs for details about the RDB error.\r\n"));
1721 shared.roslaveerr = createObject(OBJ_STRING,sdsnew(
1722 "-READONLY You can't write against a read only replica.\r\n"));
1723 shared.noautherr = createObject(OBJ_STRING,sdsnew(
1724 "-NOAUTH Authentication required.\r\n"));
1725 shared.oomerr = createObject(OBJ_STRING,sdsnew(
1726 "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
1727 shared.execaborterr = createObject(OBJ_STRING,sdsnew(
1728 "-EXECABORT Transaction discarded because of previous errors.\r\n"));
1729 shared.noreplicaserr = createObject(OBJ_STRING,sdsnew(
1730 "-NOREPLICAS Not enough good replicas to write.\r\n"));
1731 shared.busykeyerr = createObject(OBJ_STRING,sdsnew(
1732 "-BUSYKEY Target key name already exists.\r\n"));
1733
1734 /* The shared NULL depends on the protocol version. */
1735 shared.null[0] = NULL;
1736 shared.null[1] = NULL;
1737 shared.null[2] = createObject(OBJ_STRING,sdsnew("$-1\r\n"));
1738 shared.null[3] = createObject(OBJ_STRING,sdsnew("_\r\n"));
1739
1740 shared.nullarray[0] = NULL;
1741 shared.nullarray[1] = NULL;
1742 shared.nullarray[2] = createObject(OBJ_STRING,sdsnew("*-1\r\n"));
1743 shared.nullarray[3] = createObject(OBJ_STRING,sdsnew("_\r\n"));
1744
1745 shared.emptymap[0] = NULL;
1746 shared.emptymap[1] = NULL;
1747 shared.emptymap[2] = createObject(OBJ_STRING,sdsnew("*0\r\n"));
1748 shared.emptymap[3] = createObject(OBJ_STRING,sdsnew("%0\r\n"));
1749
1750 shared.emptyset[0] = NULL;
1751 shared.emptyset[1] = NULL;
1752 shared.emptyset[2] = createObject(OBJ_STRING,sdsnew("*0\r\n"));
1753 shared.emptyset[3] = createObject(OBJ_STRING,sdsnew("~0\r\n"));
1754
1755 for (j = 0; j < PROTO_SHARED_SELECT_CMDS; j++) {
1756 char dictid_str[64];
1757 int dictid_len;
1758
1759 dictid_len = ll2string(dictid_str,sizeof(dictid_str),j);
1760 shared.select[j] = createObject(OBJ_STRING,
1761 sdscatprintf(sdsempty(),
1762 "*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
1763 dictid_len, dictid_str));
1764 }
1765 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
1766 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
1767 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
1768 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
1769 shared.ssubscribebulk = createStringObject("$10\r\nssubscribe\r\n", 17);
1770 shared.sunsubscribebulk = createStringObject("$12\r\nsunsubscribe\r\n", 19);
1771 shared.smessagebulk = createStringObject("$8\r\nsmessage\r\n", 14);
1772 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1773 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
1774
1775 /* Shared command names */
1776 shared.del = createStringObject("DEL",3);
1777 shared.unlink = createStringObject("UNLINK",6);
1778 shared.rpop = createStringObject("RPOP",4);
1779 shared.lpop = createStringObject("LPOP",4);
1780 shared.lpush = createStringObject("LPUSH",5);
1781 shared.rpoplpush = createStringObject("RPOPLPUSH",9);
1782 shared.lmove = createStringObject("LMOVE",5);
1783 shared.blmove = createStringObject("BLMOVE",6);
1784 shared.zpopmin = createStringObject("ZPOPMIN",7);
1785 shared.zpopmax = createStringObject("ZPOPMAX",7);
1786 shared.multi = createStringObject("MULTI",5);
1787 shared.exec = createStringObject("EXEC",4);
1788 shared.hset = createStringObject("HSET",4);
1789 shared.srem = createStringObject("SREM",4);
1790 shared.xgroup = createStringObject("XGROUP",6);
1791 shared.xclaim = createStringObject("XCLAIM",6);
1792 shared.script = createStringObject("SCRIPT",6);
1793 shared.replconf = createStringObject("REPLCONF",8);
1794 shared.pexpireat = createStringObject("PEXPIREAT",9);
1795 shared.pexpire = createStringObject("PEXPIRE",7);
1796 shared.persist = createStringObject("PERSIST",7);
1797 shared.set = createStringObject("SET",3);
1798 shared.eval = createStringObject("EVAL",4);
1799
1800 /* Shared command argument */
1801 shared.left = createStringObject("left",4);
1802 shared.right = createStringObject("right",5);
1803 shared.pxat = createStringObject("PXAT", 4);
1804 shared.time = createStringObject("TIME",4);
1805 shared.retrycount = createStringObject("RETRYCOUNT",10);
1806 shared.force = createStringObject("FORCE",5);
1807 shared.justid = createStringObject("JUSTID",6);
1808 shared.entriesread = createStringObject("ENTRIESREAD",11);
1809 shared.lastid = createStringObject("LASTID",6);
1810 shared.default_username = createStringObject("default",7);
1811 shared.ping = createStringObject("ping",4);
1812 shared.setid = createStringObject("SETID",5);
1813 shared.keepttl = createStringObject("KEEPTTL",7);
1814 shared.absttl = createStringObject("ABSTTL",6);
1815 shared.load = createStringObject("LOAD",4);
1816 shared.createconsumer = createStringObject("CREATECONSUMER",14);
1817 shared.getack = createStringObject("GETACK",6);
1818 shared.special_asterick = createStringObject("*",1);
1819 shared.special_equals = createStringObject("=",1);
1820 shared.redacted = makeObjectShared(createStringObject("(redacted)",10));
1821
1822 for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
1823 shared.integers[j] =
1824 makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
1825 shared.integers[j]->encoding = OBJ_ENCODING_INT;
1826 }
1827 for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
1828 shared.mbulkhdr[j] = createObject(OBJ_STRING,
1829 sdscatprintf(sdsempty(),"*%d\r\n",j));
1830 shared.bulkhdr[j] = createObject(OBJ_STRING,
1831 sdscatprintf(sdsempty(),"$%d\r\n",j));
1832 shared.maphdr[j] = createObject(OBJ_STRING,
1833 sdscatprintf(sdsempty(),"%%%d\r\n",j));
1834 shared.sethdr[j] = createObject(OBJ_STRING,
1835 sdscatprintf(sdsempty(),"~%d\r\n",j));
1836 }
1837 /* The following two shared objects, minstring and maxstring, are not
1838 * actually used for their value but as a special object meaning
1839 * respectively the minimum possible string and the maximum possible
1840 * string in string comparisons for the ZRANGEBYLEX command. */
1841 shared.minstring = sdsnew("minstring");
1842 shared.maxstring = sdsnew("maxstring");
1843}
1844
1845void initServerConfig(void) {
1846 int j;
1847 char *default_bindaddr[CONFIG_DEFAULT_BINDADDR_COUNT] = CONFIG_DEFAULT_BINDADDR;
1848
1849 initConfigValues();
1850 updateCachedTime(1);
1851 getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
1852 server.runid[CONFIG_RUN_ID_SIZE] = '\0';
1853 changeReplicationId();
1854 clearReplicationId2();
1855 server.hz = CONFIG_DEFAULT_HZ; /* Initialize it ASAP, even if it may get
1856 updated later after loading the config.
1857 This value may be used before the server
1858 is initialized. */
1859 server.timezone = getTimeZone(); /* Initialized by tzset(). */
1860 server.configfile = NULL;
1861 server.executable = NULL;
1862 server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
1863 server.bindaddr_count = CONFIG_DEFAULT_BINDADDR_COUNT;
1864 for (j = 0; j < CONFIG_DEFAULT_BINDADDR_COUNT; j++)
1865 server.bindaddr[j] = zstrdup(default_bindaddr[j]);
1866 server.ipfd.count = 0;
1867 server.tlsfd.count = 0;
1868 server.sofd = -1;
1869 server.active_expire_enabled = 1;
1870 server.skip_checksum_validation = 0;
1871 server.loading = 0;
1872 server.async_loading = 0;
1873 server.loading_rdb_used_mem = 0;
1874 server.aof_state = AOF_OFF;
1875 server.aof_rewrite_base_size = 0;
1876 server.aof_rewrite_scheduled = 0;
1877 server.aof_flush_sleep = 0;
1878 server.aof_last_fsync = time(NULL);
1879 server.aof_cur_timestamp = 0;
1880 atomicSet(server.aof_bio_fsync_status,C_OK);
1881 server.aof_rewrite_time_last = -1;
1882 server.aof_rewrite_time_start = -1;
1883 server.aof_lastbgrewrite_status = C_OK;
1884 server.aof_delayed_fsync = 0;
1885 server.aof_fd = -1;
1886 server.aof_selected_db = -1; /* Make sure the first time will not match */
1887 server.aof_flush_postponed_start = 0;
1888 server.aof_last_incr_size = 0;
1889 server.active_defrag_running = 0;
1890 server.notify_keyspace_events = 0;
1891 server.blocked_clients = 0;
1892 memset(server.blocked_clients_by_type,0,
1893 sizeof(server.blocked_clients_by_type));
1894 server.shutdown_asap = 0;
1895 server.shutdown_flags = 0;
1896 server.shutdown_mstime = 0;
1897 server.cluster_module_flags = CLUSTER_MODULE_FLAG_NONE;
1898 server.migrate_cached_sockets = dictCreate(&migrateCacheDictType);
1899 server.next_client_id = 1; /* Client IDs, start from 1 .*/
1900 server.page_size = sysconf(_SC_PAGESIZE);
1901 server.pause_cron = 0;
1902
1903 server.latency_tracking_info_percentiles_len = 3;
1904 server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));
1905 server.latency_tracking_info_percentiles[0] = 50.0; /* p50 */
1906 server.latency_tracking_info_percentiles[1] = 99.0; /* p99 */
1907 server.latency_tracking_info_percentiles[2] = 99.9; /* p999 */
1908
1909 unsigned int lruclock = getLRUClock();
1910 atomicSet(server.lruclock,lruclock);
1911 resetServerSaveParams();
1912
1913 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1914 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1915 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1916
1917 /* Replication related */
1918 server.masterhost = NULL;
1919 server.masterport = 6379;
1920 server.master = NULL;
1921 server.cached_master = NULL;
1922 server.master_initial_offset = -1;
1923 server.repl_state = REPL_STATE_NONE;
1924 server.repl_transfer_tmpfile = NULL;
1925 server.repl_transfer_fd = -1;
1926 server.repl_transfer_s = NULL;
1927 server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
1928 server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
1929 server.master_repl_offset = 0;
1930
1931 /* Replication partial resync backlog */
1932 server.repl_backlog = NULL;
1933 server.repl_no_slaves_since = time(NULL);
1934
1935 /* Failover related */
1936 server.failover_end_time = 0;
1937 server.force_failover = 0;
1938 server.target_replica_host = NULL;
1939 server.target_replica_port = 0;
1940 server.failover_state = NO_FAILOVER;
1941
1942 /* Client output buffer limits */
1943 for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
1944 server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
1945
1946 /* Linux OOM Score config */
1947 for (j = 0; j < CONFIG_OOM_COUNT; j++)
1948 server.oom_score_adj_values[j] = configOOMScoreAdjValuesDefaults[j];
1949
1950 /* Double constants initialization */
1951 R_Zero = 0.0;
1952 R_PosInf = 1.0/R_Zero;
1953 R_NegInf = -1.0/R_Zero;
1954 R_Nan = R_Zero/R_Zero;
1955
1956 /* Command table -- we initialize it here as it is part of the
1957 * initial configuration, since command names may be changed via
1958 * redis.conf using the rename-command directive. */
1959 server.commands = dictCreate(&commandTableDictType);
1960 server.orig_commands = dictCreate(&commandTableDictType);
1961 populateCommandTable();
1962
1963 /* Debugging */
1964 server.watchdog_period = 0;
1965}
1966
1967extern char **environ;
1968
1969/* Restart the server, executing the same executable that started this
1970 * instance, with the same arguments and configuration file.
1971 *
1972 * The function is designed to directly call execve() so that the new
1973 * server instance will retain the PID of the previous one.
1974 *
1975 * The list of flags, that may be bitwise ORed together, alter the
1976 * behavior of this function:
1977 *
1978 * RESTART_SERVER_NONE No flags.
1979 * RESTART_SERVER_GRACEFULLY Do a proper shutdown before restarting.
1980 * RESTART_SERVER_CONFIG_REWRITE Rewrite the config file before restarting.
1981 *
1982 * On success the function does not return, because the process turns into
1983 * a different process. On error C_ERR is returned. */
1984int restartServer(int flags, mstime_t delay) {
1985 int j;
1986
1987 /* Check if we still have accesses to the executable that started this
1988 * server instance. */
1989 if (access(server.executable,X_OK) == -1) {
1990 serverLog(LL_WARNING,"Can't restart: this process has no "
1991 "permissions to execute %s", server.executable);
1992 return C_ERR;
1993 }
1994
1995 /* Config rewriting. */
1996 if (flags & RESTART_SERVER_CONFIG_REWRITE &&
1997 server.configfile &&
1998 rewriteConfig(server.configfile, 0) == -1)
1999 {
2000 serverLog(LL_WARNING,"Can't restart: configuration rewrite process "
2001 "failed: %s", strerror(errno));
2002 return C_ERR;
2003 }
2004
2005 /* Perform a proper shutdown. We don't wait for lagging replicas though. */
2006 if (flags & RESTART_SERVER_GRACEFULLY &&
2007 prepareForShutdown(SHUTDOWN_NOW) != C_OK)
2008 {
2009 serverLog(LL_WARNING,"Can't restart: error preparing for shutdown");
2010 return C_ERR;
2011 }
2012
2013 /* Close all file descriptors, with the exception of stdin, stdout, stderr
2014 * which are useful if we restart a Redis server which is not daemonized. */
2015 for (j = 3; j < (int)server.maxclients + 1024; j++) {
2016 /* Test the descriptor validity before closing it, otherwise
2017 * Valgrind issues a warning on close(). */
2018 if (fcntl(j,F_GETFD) != -1) close(j);
2019 }
2020
2021 /* Execute the server with the original command line. */
2022 if (delay) usleep(delay*1000);
2023 zfree(server.exec_argv[0]);
2024 server.exec_argv[0] = zstrdup(server.executable);
2025 execve(server.executable,server.exec_argv,environ);
2026
2027 /* If an error occurred here, there is nothing we can do, but exit. */
2028 _exit(1);
2029
2030 return C_ERR; /* Never reached. */
2031}
2032
2033/* This function will configure the current process's oom_score_adj according
2034 * to user specified configuration. This is currently implemented on Linux
2035 * only.
2036 *
2037 * A process_class value of -1 implies OOM_CONFIG_MASTER or OOM_CONFIG_REPLICA,
2038 * depending on current role.
2039 */
2040int setOOMScoreAdj(int process_class) {
2041 if (process_class == -1)
2042 process_class = (server.masterhost ? CONFIG_OOM_REPLICA : CONFIG_OOM_MASTER);
2043
2044 serverAssert(process_class >= 0 && process_class < CONFIG_OOM_COUNT);
2045
2046#ifdef HAVE_PROC_OOM_SCORE_ADJ
2047 /* The following statics are used to indicate Redis has changed the process's oom score.
2048 * And to save the original score so we can restore it later if needed.
2049 * We need this so when we disabled oom-score-adj (also during configuration rollback
2050 * when another configuration parameter was invalid and causes a rollback after
2051 * applying a new oom-score) we can return to the oom-score value from before our
2052 * adjustments. */
2053 static int oom_score_adjusted_by_redis = 0;
2054 static int oom_score_adj_base = 0;
2055
2056 int fd;
2057 int val;
2058 char buf[64];
2059
2060 if (server.oom_score_adj != OOM_SCORE_ADJ_NO) {
2061 if (!oom_score_adjusted_by_redis) {
2062 oom_score_adjusted_by_redis = 1;
2063 /* Backup base value before enabling Redis control over oom score */
2064 fd = open("/proc/self/oom_score_adj", O_RDONLY);
2065 if (fd < 0 || read(fd, buf, sizeof(buf)) < 0) {
2066 serverLog(LL_WARNING, "Unable to read oom_score_adj: %s", strerror(errno));
2067 if (fd != -1) close(fd);
2068 return C_ERR;
2069 }
2070 oom_score_adj_base = atoi(buf);
2071 close(fd);
2072 }
2073
2074 val = server.oom_score_adj_values[process_class];
2075 if (server.oom_score_adj == OOM_SCORE_RELATIVE)
2076 val += oom_score_adj_base;
2077 if (val > 1000) val = 1000;
2078 if (val < -1000) val = -1000;
2079 } else if (oom_score_adjusted_by_redis) {
2080 oom_score_adjusted_by_redis = 0;
2081 val = oom_score_adj_base;
2082 }
2083 else {
2084 return C_OK;
2085 }
2086
2087 snprintf(buf, sizeof(buf) - 1, "%d\n", val);
2088
2089 fd = open("/proc/self/oom_score_adj", O_WRONLY);
2090 if (fd < 0 || write(fd, buf, strlen(buf)) < 0) {
2091 serverLog(LL_WARNING, "Unable to write oom_score_adj: %s", strerror(errno));
2092 if (fd != -1) close(fd);
2093 return C_ERR;
2094 }
2095
2096 close(fd);
2097 return C_OK;
2098#else
2099 /* Unsupported */
2100 return C_ERR;
2101#endif
2102}
2103
2104/* This function will try to raise the max number of open files accordingly to
2105 * the configured max number of clients. It also reserves a number of file
2106 * descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of
2107 * persistence, listening sockets, log files and so forth.
2108 *
2109 * If it will not be possible to set the limit accordingly to the configured
2110 * max number of clients, the function will do the reverse setting
2111 * server.maxclients to the value that we can actually handle. */
2112void adjustOpenFilesLimit(void) {
2113 rlim_t maxfiles = server.maxclients+CONFIG_MIN_RESERVED_FDS;
2114 struct rlimit limit;
2115
2116 if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
2117 serverLog(LL_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
2118 strerror(errno));
2119 server.maxclients = 1024-CONFIG_MIN_RESERVED_FDS;
2120 } else {
2121 rlim_t oldlimit = limit.rlim_cur;
2122
2123 /* Set the max number of files if the current limit is not enough
2124 * for our needs. */
2125 if (oldlimit < maxfiles) {
2126 rlim_t bestlimit;
2127 int setrlimit_error = 0;
2128
2129 /* Try to set the file limit to match 'maxfiles' or at least
2130 * to the higher value supported less than maxfiles. */
2131 bestlimit = maxfiles;
2132 while(bestlimit > oldlimit) {
2133 rlim_t decr_step = 16;
2134
2135 limit.rlim_cur = bestlimit;
2136 limit.rlim_max = bestlimit;
2137 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
2138 setrlimit_error = errno;
2139
2140 /* We failed to set file limit to 'bestlimit'. Try with a
2141 * smaller limit decrementing by a few FDs per iteration. */
2142 if (bestlimit < decr_step) {
2143 bestlimit = oldlimit;
2144 break;
2145 }
2146 bestlimit -= decr_step;
2147 }
2148
2149 /* Assume that the limit we get initially is still valid if
2150 * our last try was even lower. */
2151 if (bestlimit < oldlimit) bestlimit = oldlimit;
2152
2153 if (bestlimit < maxfiles) {
2154 unsigned int old_maxclients = server.maxclients;
2155 server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS;
2156 /* maxclients is unsigned so may overflow: in order
2157 * to check if maxclients is now logically less than 1
2158 * we test indirectly via bestlimit. */
2159 if (bestlimit <= CONFIG_MIN_RESERVED_FDS) {
2160 serverLog(LL_WARNING,"Your current 'ulimit -n' "
2161 "of %llu is not enough for the server to start. "
2162 "Please increase your open file limit to at least "
2163 "%llu. Exiting.",
2164 (unsigned long long) oldlimit,
2165 (unsigned long long) maxfiles);
2166 exit(1);
2167 }
2168 serverLog(LL_WARNING,"You requested maxclients of %d "
2169 "requiring at least %llu max file descriptors.",
2170 old_maxclients,
2171 (unsigned long long) maxfiles);
2172 serverLog(LL_WARNING,"Server can't set maximum open files "
2173 "to %llu because of OS error: %s.",
2174 (unsigned long long) maxfiles, strerror(setrlimit_error));
2175 serverLog(LL_WARNING,"Current maximum open files is %llu. "
2176 "maxclients has been reduced to %d to compensate for "
2177 "low ulimit. "
2178 "If you need higher maxclients increase 'ulimit -n'.",
2179 (unsigned long long) bestlimit, server.maxclients);
2180 } else {
2181 serverLog(LL_NOTICE,"Increased maximum number of open files "
2182 "to %llu (it was originally set to %llu).",
2183 (unsigned long long) maxfiles,
2184 (unsigned long long) oldlimit);
2185 }
2186 }
2187 }
2188}
2189
2190/* Check that server.tcp_backlog can be actually enforced in Linux according
2191 * to the value of /proc/sys/net/core/somaxconn, or warn about it. */
2192void checkTcpBacklogSettings(void) {
2193#if defined(HAVE_PROC_SOMAXCONN)
2194 FILE *fp = fopen("/proc/sys/net/core/somaxconn","r");
2195 char buf[1024];
2196 if (!fp) return;
2197 if (fgets(buf,sizeof(buf),fp) != NULL) {
2198 int somaxconn = atoi(buf);
2199 if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
2200 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
2201 }
2202 }
2203 fclose(fp);
2204#elif defined(HAVE_SYSCTL_KIPC_SOMAXCONN)
2205 int somaxconn, mib[3];
2206 size_t len = sizeof(int);
2207
2208 mib[0] = CTL_KERN;
2209 mib[1] = KERN_IPC;
2210 mib[2] = KIPC_SOMAXCONN;
2211
2212 if (sysctl(mib, 3, &somaxconn, &len, NULL, 0) == 0) {
2213 if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
2214 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because kern.ipc.somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
2215 }
2216 }
2217#elif defined(HAVE_SYSCTL_KERN_SOMAXCONN)
2218 int somaxconn, mib[2];
2219 size_t len = sizeof(int);
2220
2221 mib[0] = CTL_KERN;
2222 mib[1] = KERN_SOMAXCONN;
2223
2224 if (sysctl(mib, 2, &somaxconn, &len, NULL, 0) == 0) {
2225 if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
2226 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because kern.somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
2227 }
2228 }
2229#elif defined(SOMAXCONN)
2230 if (SOMAXCONN < server.tcp_backlog) {
2231 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because SOMAXCONN is set to the lower value of %d.", server.tcp_backlog, SOMAXCONN);
2232 }
2233#endif
2234}
2235
2236void closeSocketListeners(socketFds *sfd) {
2237 int j;
2238
2239 for (j = 0; j < sfd->count; j++) {
2240 if (sfd->fd[j] == -1) continue;
2241
2242 aeDeleteFileEvent(server.el, sfd->fd[j], AE_READABLE);
2243 close(sfd->fd[j]);
2244 }
2245
2246 sfd->count = 0;
2247}
2248
2249/* Create an event handler for accepting new connections in TCP or TLS domain sockets.
2250 * This works atomically for all socket fds */
2251int createSocketAcceptHandler(socketFds *sfd, aeFileProc *accept_handler) {
2252 int j;
2253
2254 for (j = 0; j < sfd->count; j++) {
2255 if (aeCreateFileEvent(server.el, sfd->fd[j], AE_READABLE, accept_handler,NULL) == AE_ERR) {
2256 /* Rollback */
2257 for (j = j-1; j >= 0; j--) aeDeleteFileEvent(server.el, sfd->fd[j], AE_READABLE);
2258 return C_ERR;
2259 }
2260 }
2261 return C_OK;
2262}
2263
2264/* Initialize a set of file descriptors to listen to the specified 'port'
2265 * binding the addresses specified in the Redis server configuration.
2266 *
2267 * The listening file descriptors are stored in the integer array 'fds'
2268 * and their number is set in '*count'.
2269 *
2270 * The addresses to bind are specified in the global server.bindaddr array
2271 * and their number is server.bindaddr_count. If the server configuration
2272 * contains no specific addresses to bind, this function will try to
2273 * bind * (all addresses) for both the IPv4 and IPv6 protocols.
2274 *
2275 * On success the function returns C_OK.
2276 *
2277 * On error the function returns C_ERR. For the function to be on
2278 * error, at least one of the server.bindaddr addresses was
2279 * impossible to bind, or no bind addresses were specified in the server
2280 * configuration but the function is not able to bind * for at least
2281 * one of the IPv4 or IPv6 protocols. */
2282int listenToPort(int port, socketFds *sfd) {
2283 int j;
2284 char **bindaddr = server.bindaddr;
2285
2286 /* If we have no bind address, we don't listen on a TCP socket */
2287 if (server.bindaddr_count == 0) return C_OK;
2288
2289 for (j = 0; j < server.bindaddr_count; j++) {
2290 char* addr = bindaddr[j];
2291 int optional = *addr == '-';
2292 if (optional) addr++;
2293 if (strchr(addr,':')) {
2294 /* Bind IPv6 address. */
2295 sfd->fd[sfd->count] = anetTcp6Server(server.neterr,port,addr,server.tcp_backlog);
2296 } else {
2297 /* Bind IPv4 address. */
2298 sfd->fd[sfd->count] = anetTcpServer(server.neterr,port,addr,server.tcp_backlog);
2299 }
2300 if (sfd->fd[sfd->count] == ANET_ERR) {
2301 int net_errno = errno;
2302 serverLog(LL_WARNING,
2303 "Warning: Could not create server TCP listening socket %s:%d: %s",
2304 addr, port, server.neterr);
2305 if (net_errno == EADDRNOTAVAIL && optional)
2306 continue;
2307 if (net_errno == ENOPROTOOPT || net_errno == EPROTONOSUPPORT ||
2308 net_errno == ESOCKTNOSUPPORT || net_errno == EPFNOSUPPORT ||
2309 net_errno == EAFNOSUPPORT)
2310 continue;
2311
2312 /* Rollback successful listens before exiting */
2313 closeSocketListeners(sfd);
2314 return C_ERR;
2315 }
2316 if (server.socket_mark_id > 0) anetSetSockMarkId(NULL, sfd->fd[sfd->count], server.socket_mark_id);
2317 anetNonBlock(NULL,sfd->fd[sfd->count]);
2318 anetCloexec(sfd->fd[sfd->count]);
2319 sfd->count++;
2320 }
2321 return C_OK;
2322}
2323
2324/* Resets the stats that we expose via INFO or other means that we want
2325 * to reset via CONFIG RESETSTAT. The function is also used in order to
2326 * initialize these fields in initServer() at server startup. */
2327void resetServerStats(void) {
2328 int j;
2329
2330 server.stat_numcommands = 0;
2331 server.stat_numconnections = 0;
2332 server.stat_expiredkeys = 0;
2333 server.stat_expired_stale_perc = 0;
2334 server.stat_expired_time_cap_reached_count = 0;
2335 server.stat_expire_cycle_time_used = 0;
2336 server.stat_evictedkeys = 0;
2337 server.stat_evictedclients = 0;
2338 server.stat_total_eviction_exceeded_time = 0;
2339 server.stat_last_eviction_exceeded_time = 0;
2340 server.stat_keyspace_misses = 0;
2341 server.stat_keyspace_hits = 0;
2342 server.stat_active_defrag_hits = 0;
2343 server.stat_active_defrag_misses = 0;
2344 server.stat_active_defrag_key_hits = 0;
2345 server.stat_active_defrag_key_misses = 0;
2346 server.stat_active_defrag_scanned = 0;
2347 server.stat_total_active_defrag_time = 0;
2348 server.stat_last_active_defrag_time = 0;
2349 server.stat_fork_time = 0;
2350 server.stat_fork_rate = 0;
2351 server.stat_total_forks = 0;
2352 server.stat_rejected_conn = 0;
2353 server.stat_sync_full = 0;
2354 server.stat_sync_partial_ok = 0;
2355 server.stat_sync_partial_err = 0;
2356 server.stat_io_reads_processed = 0;
2357 atomicSet(server.stat_total_reads_processed, 0);
2358 server.stat_io_writes_processed = 0;
2359 atomicSet(server.stat_total_writes_processed, 0);
2360 for (j = 0; j < STATS_METRIC_COUNT; j++) {
2361 server.inst_metric[j].idx = 0;
2362 server.inst_metric[j].last_sample_time = mstime();
2363 server.inst_metric[j].last_sample_count = 0;
2364 memset(server.inst_metric[j].samples,0,
2365 sizeof(server.inst_metric[j].samples));
2366 }
2367 server.stat_aof_rewrites = 0;
2368 server.stat_rdb_saves = 0;
2369 server.stat_aofrw_consecutive_failures = 0;
2370 atomicSet(server.stat_net_input_bytes, 0);
2371 atomicSet(server.stat_net_output_bytes, 0);
2372 atomicSet(server.stat_net_repl_input_bytes, 0);
2373 atomicSet(server.stat_net_repl_output_bytes, 0);
2374 server.stat_unexpected_error_replies = 0;
2375 server.stat_total_error_replies = 0;
2376 server.stat_dump_payload_sanitizations = 0;
2377 server.aof_delayed_fsync = 0;
2378 server.stat_reply_buffer_shrinks = 0;
2379 server.stat_reply_buffer_expands = 0;
2380 lazyfreeResetStats();
2381}
2382
2383/* Make the thread killable at any time, so that kill threads functions
2384 * can work reliably (default cancelability type is PTHREAD_CANCEL_DEFERRED).
2385 * Needed for pthread_cancel used by the fast memory test used by the crash report. */
2386void makeThreadKillable(void) {
2387 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
2388 pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
2389}
2390
2391void initServer(void) {
2392 int j;
2393
2394 signal(SIGHUP, SIG_IGN);
2395 signal(SIGPIPE, SIG_IGN);
2396 setupSignalHandlers();
2397 makeThreadKillable();
2398
2399 if (server.syslog_enabled) {
2400 openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
2401 server.syslog_facility);
2402 }
2403
2404 /* Initialization after setting defaults from the config system. */
2405 server.aof_state = server.aof_enabled ? AOF_ON : AOF_OFF;
2406 server.hz = server.config_hz;
2407 server.pid = getpid();
2408 server.in_fork_child = CHILD_TYPE_NONE;
2409 server.main_thread_id = pthread_self();
2410 server.current_client = NULL;
2411 server.errors = raxNew();
2412 server.fixed_time_expire = 0;
2413 server.in_nested_call = 0;
2414 server.clients = listCreate();
2415 server.clients_index = raxNew();
2416 server.clients_to_close = listCreate();
2417 server.slaves = listCreate();
2418 server.monitors = listCreate();
2419 server.clients_pending_write = listCreate();
2420 server.clients_pending_read = listCreate();
2421 server.clients_timeout_table = raxNew();
2422 server.replication_allowed = 1;
2423 server.slaveseldb = -1; /* Force to emit the first SELECT command. */
2424 server.unblocked_clients = listCreate();
2425 server.ready_keys = listCreate();
2426 server.tracking_pending_keys = listCreate();
2427 server.clients_waiting_acks = listCreate();
2428 server.get_ack_from_slaves = 0;
2429 server.client_pause_type = CLIENT_PAUSE_OFF;
2430 server.client_pause_end_time = 0;
2431 memset(server.client_pause_per_purpose, 0,
2432 sizeof(server.client_pause_per_purpose));
2433 server.postponed_clients = listCreate();
2434 server.events_processed_while_blocked = 0;
2435 server.system_memory_size = zmalloc_get_memory_size();
2436 server.blocked_last_cron = 0;
2437 server.blocking_op_nesting = 0;
2438 server.thp_enabled = 0;
2439 server.cluster_drop_packet_filter = -1;
2440 server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME;
2441 server.reply_buffer_resizing_enabled = 1;
2442 resetReplicationBuffer();
2443
2444 if ((server.tls_port || server.tls_replication || server.tls_cluster)
2445 && tlsConfigure(&server.tls_ctx_config) == C_ERR) {
2446 serverLog(LL_WARNING, "Failed to configure TLS. Check logs for more info.");
2447 exit(1);
2448 }
2449
2450 for (j = 0; j < CLIENT_MEM_USAGE_BUCKETS; j++) {
2451 server.client_mem_usage_buckets[j].mem_usage_sum = 0;
2452 server.client_mem_usage_buckets[j].clients = listCreate();
2453 }
2454
2455 createSharedObjects();
2456 adjustOpenFilesLimit();
2457 const char *clk_msg = monotonicInit();
2458 serverLog(LL_NOTICE, "monotonic clock: %s", clk_msg);
2459 server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
2460 if (server.el == NULL) {
2461 serverLog(LL_WARNING,
2462 "Failed creating the event loop. Error message: '%s'",
2463 strerror(errno));
2464 exit(1);
2465 }
2466 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
2467
2468 /* Open the TCP listening socket for the user commands. */
2469 if (server.port != 0 &&
2470 listenToPort(server.port,&server.ipfd) == C_ERR) {
2471 /* Note: the following log text is matched by the test suite. */
2472 serverLog(LL_WARNING, "Failed listening on port %u (TCP), aborting.", server.port);
2473 exit(1);
2474 }
2475 if (server.tls_port != 0 &&
2476 listenToPort(server.tls_port,&server.tlsfd) == C_ERR) {
2477 /* Note: the following log text is matched by the test suite. */
2478 serverLog(LL_WARNING, "Failed listening on port %u (TLS), aborting.", server.tls_port);
2479 exit(1);
2480 }
2481
2482 /* Open the listening Unix domain socket. */
2483 if (server.unixsocket != NULL) {
2484 unlink(server.unixsocket); /* don't care if this fails */
2485 server.sofd = anetUnixServer(server.neterr,server.unixsocket,
2486 (mode_t)server.unixsocketperm, server.tcp_backlog);
2487 if (server.sofd == ANET_ERR) {
2488 serverLog(LL_WARNING, "Failed opening Unix socket: %s", server.neterr);
2489 exit(1);
2490 }
2491 anetNonBlock(NULL,server.sofd);
2492 anetCloexec(server.sofd);
2493 }
2494
2495 /* Abort if there are no listening sockets at all. */
2496 if (server.ipfd.count == 0 && server.tlsfd.count == 0 && server.sofd < 0) {
2497 serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
2498 exit(1);
2499 }
2500
2501 /* Create the Redis databases, and initialize other internal state. */
2502 for (j = 0; j < server.dbnum; j++) {
2503 server.db[j].dict = dictCreate(&dbDictType);
2504 server.db[j].expires = dictCreate(&dbExpiresDictType);
2505 server.db[j].expires_cursor = 0;
2506 server.db[j].blocking_keys = dictCreate(&keylistDictType);
2507 server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType);
2508 server.db[j].watched_keys = dictCreate(&keylistDictType);
2509 server.db[j].id = j;
2510 server.db[j].avg_ttl = 0;
2511 server.db[j].defrag_later = listCreate();
2512 server.db[j].slots_to_keys = NULL; /* Set by clusterInit later on if necessary. */
2513 listSetFreeMethod(server.db[j].defrag_later,(void (*)(void*))sdsfree);
2514 }
2515 evictionPoolAlloc(); /* Initialize the LRU keys pool. */
2516 server.pubsub_channels = dictCreate(&keylistDictType);
2517 server.pubsub_patterns = dictCreate(&keylistDictType);
2518 server.pubsubshard_channels = dictCreate(&keylistDictType);
2519 server.cronloops = 0;
2520 server.in_exec = 0;
2521 server.busy_module_yield_flags = BUSY_MODULE_YIELD_NONE;
2522 server.busy_module_yield_reply = NULL;
2523 server.core_propagates = 0;
2524 server.propagate_no_multi = 0;
2525 server.module_ctx_nesting = 0;
2526 server.client_pause_in_transaction = 0;
2527 server.child_pid = -1;
2528 server.child_type = CHILD_TYPE_NONE;
2529 server.rdb_child_type = RDB_CHILD_TYPE_NONE;
2530 server.rdb_pipe_conns = NULL;
2531 server.rdb_pipe_numconns = 0;
2532 server.rdb_pipe_numconns_writing = 0;
2533 server.rdb_pipe_buff = NULL;
2534 server.rdb_pipe_bufflen = 0;
2535 server.rdb_bgsave_scheduled = 0;
2536 server.child_info_pipe[0] = -1;
2537 server.child_info_pipe[1] = -1;
2538 server.child_info_nread = 0;
2539 server.aof_buf = sdsempty();
2540 server.lastsave = time(NULL); /* At startup we consider the DB saved. */
2541 server.lastbgsave_try = 0; /* At startup we never tried to BGSAVE. */
2542 server.rdb_save_time_last = -1;
2543 server.rdb_save_time_start = -1;
2544 server.rdb_last_load_keys_expired = 0;
2545 server.rdb_last_load_keys_loaded = 0;
2546 server.dirty = 0;
2547 resetServerStats();
2548 /* A few stats we don't want to reset: server startup time, and peak mem. */
2549 server.stat_starttime = time(NULL);
2550 server.stat_peak_memory = 0;
2551 server.stat_current_cow_peak = 0;
2552 server.stat_current_cow_bytes = 0;
2553 server.stat_current_cow_updated = 0;
2554 server.stat_current_save_keys_processed = 0;
2555 server.stat_current_save_keys_total = 0;
2556 server.stat_rdb_cow_bytes = 0;
2557 server.stat_aof_cow_bytes = 0;
2558 server.stat_module_cow_bytes = 0;
2559 server.stat_module_progress = 0;
2560 for (int j = 0; j < CLIENT_TYPE_COUNT; j++)
2561 server.stat_clients_type_memory[j] = 0;
2562 server.stat_cluster_links_memory = 0;
2563 server.cron_malloc_stats.zmalloc_used = 0;
2564 server.cron_malloc_stats.process_rss = 0;
2565 server.cron_malloc_stats.allocator_allocated = 0;
2566 server.cron_malloc_stats.allocator_active = 0;
2567 server.cron_malloc_stats.allocator_resident = 0;
2568 server.lastbgsave_status = C_OK;
2569 server.aof_last_write_status = C_OK;
2570 server.aof_last_write_errno = 0;
2571 server.repl_good_slaves_count = 0;
2572 server.last_sig_received = 0;
2573
2574 /* Create the timer callback, this is our way to process many background
2575 * operations incrementally, like clients timeout, eviction of unaccessed
2576 * expired keys and so forth. */
2577 if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
2578 serverPanic("Can't create event loop timers.");
2579 exit(1);
2580 }
2581
2582 /* Create an event handler for accepting new connections in TCP and Unix
2583 * domain sockets. */
2584 if (createSocketAcceptHandler(&server.ipfd, acceptTcpHandler) != C_OK) {
2585 serverPanic("Unrecoverable error creating TCP socket accept handler.");
2586 }
2587 if (createSocketAcceptHandler(&server.tlsfd, acceptTLSHandler) != C_OK) {
2588 serverPanic("Unrecoverable error creating TLS socket accept handler.");
2589 }
2590 if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
2591 acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
2592
2593
2594 /* Register a readable event for the pipe used to awake the event loop
2595 * from module threads. */
2596 if (aeCreateFileEvent(server.el, server.module_pipe[0], AE_READABLE,
2597 modulePipeReadable,NULL) == AE_ERR) {
2598 serverPanic(
2599 "Error registering the readable event for the module pipe.");
2600 }
2601
2602 /* Register before and after sleep handlers (note this needs to be done
2603 * before loading persistence since it is used by processEventsWhileBlocked. */
2604 aeSetBeforeSleepProc(server.el,beforeSleep);
2605 aeSetAfterSleepProc(server.el,afterSleep);
2606
2607 /* 32 bit instances are limited to 4GB of address space, so if there is
2608 * no explicit limit in the user provided configuration we set a limit
2609 * at 3 GB using maxmemory with 'noeviction' policy'. This avoids
2610 * useless crashes of the Redis instance for out of memory. */
2611 if (server.arch_bits == 32 && server.maxmemory == 0) {
2612 serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
2613 server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
2614 server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
2615 }
2616
2617 if (server.cluster_enabled) clusterInit();
2618 scriptingInit(1);
2619 functionsInit();
2620 slowlogInit();
2621 latencyMonitorInit();
2622
2623 /* Initialize ACL default password if it exists */
2624 ACLUpdateDefaultUserPassword(server.requirepass);
2625
2626 applyWatchdogPeriod();
2627}
2628
2629/* Some steps in server initialization need to be done last (after modules
2630 * are loaded).
2631 * Specifically, creation of threads due to a race bug in ld.so, in which
2632 * Thread Local Storage initialization collides with dlopen call.
2633 * see: https://sourceware.org/bugzilla/show_bug.cgi?id=19329 */
2634void InitServerLast() {
2635 bioInit();
2636 initThreadedIO();
2637 set_jemalloc_bg_thread(server.jemalloc_bg_thread);
2638 server.initial_memory_usage = zmalloc_used_memory();
2639}
2640
2641/* The purpose of this function is to try to "glue" consecutive range
2642 * key specs in order to build the legacy (first,last,step) spec
2643 * used by the COMMAND command.
2644 * By far the most common case is just one range spec (e.g. SET)
2645 * but some commands' ranges were split into two or more ranges
2646 * in order to have different flags for different keys (e.g. SMOVE,
2647 * first key is "RW ACCESS DELETE", second key is "RW INSERT").
2648 *
2649 * Additionally set the CMD_MOVABLE_KEYS flag for commands that may have key
2650 * names in their arguments, but the legacy range spec doesn't cover all of them.
2651 *
2652 * This function uses very basic heuristics and is "best effort":
2653 * 1. Only commands which have only "range" specs are considered.
2654 * 2. Only range specs with keystep of 1 are considered.
2655 * 3. The order of the range specs must be ascending (i.e.
2656 * lastkey of spec[i] == firstkey-1 of spec[i+1]).
2657 *
2658 * This function will succeed on all native Redis commands and may
2659 * fail on module commands, even if it only has "range" specs that
2660 * could actually be "glued", in the following cases:
2661 * 1. The order of "range" specs is not ascending (e.g. the spec for
2662 * the key at index 2 was added before the spec of the key at
2663 * index 1).
2664 * 2. The "range" specs have keystep >1.
2665 *
2666 * If this functions fails it means that the legacy (first,last,step)
2667 * spec used by COMMAND will show 0,0,0. This is not a dire situation
2668 * because anyway the legacy (first,last,step) spec is to be deprecated
2669 * and one should use the new key specs scheme.
2670 */
2671void populateCommandLegacyRangeSpec(struct redisCommand *c) {
2672 memset(&c->legacy_range_key_spec, 0, sizeof(c->legacy_range_key_spec));
2673
2674 /* Set the movablekeys flag if we have a GETKEYS flag for modules.
2675 * Note that for native redis commands, we always have keyspecs,
2676 * with enough information to rely on for movablekeys. */
2677 if (c->flags & CMD_MODULE_GETKEYS)
2678 c->flags |= CMD_MOVABLE_KEYS;
2679
2680 /* no key-specs, no keys, exit. */
2681 if (c->key_specs_num == 0) {
2682 return;
2683 }
2684
2685 if (c->key_specs_num == 1 &&
2686 c->key_specs[0].begin_search_type == KSPEC_BS_INDEX &&
2687 c->key_specs[0].find_keys_type == KSPEC_FK_RANGE)
2688 {
2689 /* Quick win, exactly one range spec. */
2690 c->legacy_range_key_spec = c->key_specs[0];
2691 /* If it has the incomplete flag, set the movablekeys flag on the command. */
2692 if (c->key_specs[0].flags & CMD_KEY_INCOMPLETE)
2693 c->flags |= CMD_MOVABLE_KEYS;
2694 return;
2695 }
2696
2697 int firstkey = INT_MAX, lastkey = 0;
2698 int prev_lastkey = 0;
2699 for (int i = 0; i < c->key_specs_num; i++) {
2700 if (c->key_specs[i].begin_search_type != KSPEC_BS_INDEX ||
2701 c->key_specs[i].find_keys_type != KSPEC_FK_RANGE)
2702 {
2703 /* Found an incompatible (non range) spec, skip it, and set the movablekeys flag. */
2704 c->flags |= CMD_MOVABLE_KEYS;
2705 continue;
2706 }
2707 if (c->key_specs[i].fk.range.keystep != 1 ||
2708 (prev_lastkey && prev_lastkey != c->key_specs[i].bs.index.pos-1))
2709 {
2710 /* Found a range spec that's not plain (step of 1) or not consecutive to the previous one.
2711 * Skip it, and we set the movablekeys flag. */
2712 c->flags |= CMD_MOVABLE_KEYS;
2713 continue;
2714 }
2715 if (c->key_specs[i].flags & CMD_KEY_INCOMPLETE) {
2716 /* The spec we're using is incomplete, we can use it, but we also have to set the movablekeys flag. */
2717 c->flags |= CMD_MOVABLE_KEYS;
2718 }
2719 firstkey = min(firstkey, c->key_specs[i].bs.index.pos);
2720 /* Get the absolute index for lastkey (in the "range" spec, lastkey is relative to firstkey) */
2721 int lastkey_abs_index = c->key_specs[i].fk.range.lastkey;
2722 if (lastkey_abs_index >= 0)
2723 lastkey_abs_index += c->key_specs[i].bs.index.pos;
2724 /* For lastkey we use unsigned comparison to handle negative values correctly */
2725 lastkey = max((unsigned)lastkey, (unsigned)lastkey_abs_index);
2726 prev_lastkey = lastkey;
2727 }
2728
2729 if (firstkey == INT_MAX) {
2730 /* Couldn't find range specs, the legacy range spec will remain empty, and we set the movablekeys flag. */
2731 c->flags |= CMD_MOVABLE_KEYS;
2732 return;
2733 }
2734
2735 serverAssert(firstkey != 0);
2736 serverAssert(lastkey != 0);
2737
2738 c->legacy_range_key_spec.begin_search_type = KSPEC_BS_INDEX;
2739 c->legacy_range_key_spec.bs.index.pos = firstkey;
2740 c->legacy_range_key_spec.find_keys_type = KSPEC_FK_RANGE;
2741 c->legacy_range_key_spec.fk.range.lastkey = lastkey < 0 ? lastkey : (lastkey-firstkey); /* in the "range" spec, lastkey is relative to firstkey */
2742 c->legacy_range_key_spec.fk.range.keystep = 1;
2743 c->legacy_range_key_spec.fk.range.limit = 0;
2744}
2745
2746sds catSubCommandFullname(const char *parent_name, const char *sub_name) {
2747 return sdscatfmt(sdsempty(), "%s|%s", parent_name, sub_name);
2748}
2749
2750void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name) {
2751 if (!parent->subcommands_dict)
2752 parent->subcommands_dict = dictCreate(&commandTableDictType);
2753
2754 subcommand->parent = parent; /* Assign the parent command */
2755 subcommand->id = ACLGetCommandID(subcommand->fullname); /* Assign the ID used for ACL. */
2756
2757 serverAssert(dictAdd(parent->subcommands_dict, sdsnew(declared_name), subcommand) == DICT_OK);
2758}
2759
2760/* Set implicit ACl categories (see comment above the definition of
2761 * struct redisCommand). */
2762void setImplicitACLCategories(struct redisCommand *c) {
2763 if (c->flags & CMD_WRITE)
2764 c->acl_categories |= ACL_CATEGORY_WRITE;
2765 /* Exclude scripting commands from the RO category. */
2766 if (c->flags & CMD_READONLY && !(c->acl_categories & ACL_CATEGORY_SCRIPTING))
2767 c->acl_categories |= ACL_CATEGORY_READ;
2768 if (c->flags & CMD_ADMIN)
2769 c->acl_categories |= ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS;
2770 if (c->flags & CMD_PUBSUB)
2771 c->acl_categories |= ACL_CATEGORY_PUBSUB;
2772 if (c->flags & CMD_FAST)
2773 c->acl_categories |= ACL_CATEGORY_FAST;
2774 if (c->flags & CMD_BLOCKING)
2775 c->acl_categories |= ACL_CATEGORY_BLOCKING;
2776
2777 /* If it's not @fast is @slow in this binary world. */
2778 if (!(c->acl_categories & ACL_CATEGORY_FAST))
2779 c->acl_categories |= ACL_CATEGORY_SLOW;
2780}
2781
2782/* Recursively populate the args structure (setting num_args to the number of
2783 * subargs) and return the number of args. */
2784int populateArgsStructure(struct redisCommandArg *args) {
2785 if (!args)
2786 return 0;
2787 int count = 0;
2788 while (args->name) {
2789 serverAssert(count < INT_MAX);
2790 args->num_args = populateArgsStructure(args->subargs);
2791 count++;
2792 args++;
2793 }
2794 return count;
2795}
2796
2797/* Recursively populate the command structure.
2798 *
2799 * On success, the function return C_OK. Otherwise C_ERR is returned and we won't
2800 * add this command in the commands dict. */
2801int populateCommandStructure(struct redisCommand *c) {
2802 /* If the command marks with CMD_SENTINEL, it exists in sentinel. */
2803 if (!(c->flags & CMD_SENTINEL) && server.sentinel_mode)
2804 return C_ERR;
2805
2806 /* If the command marks with CMD_ONLY_SENTINEL, it only exists in sentinel. */
2807 if (c->flags & CMD_ONLY_SENTINEL && !server.sentinel_mode)
2808 return C_ERR;
2809
2810 /* Translate the command string flags description into an actual
2811 * set of flags. */
2812 setImplicitACLCategories(c);
2813
2814 /* Redis commands don't need more args than STATIC_KEY_SPECS_NUM (Number of keys
2815 * specs can be greater than STATIC_KEY_SPECS_NUM only for module commands) */
2816 c->key_specs = c->key_specs_static;
2817 c->key_specs_max = STATIC_KEY_SPECS_NUM;
2818
2819 /* We start with an unallocated histogram and only allocate memory when a command
2820 * has been issued for the first time */
2821 c->latency_histogram = NULL;
2822
2823 for (int i = 0; i < STATIC_KEY_SPECS_NUM; i++) {
2824 if (c->key_specs[i].begin_search_type == KSPEC_BS_INVALID)
2825 break;
2826 c->key_specs_num++;
2827 }
2828
2829 /* Count things so we don't have to use deferred reply in COMMAND reply. */
2830 while (c->history && c->history[c->num_history].since)
2831 c->num_history++;
2832 while (c->tips && c->tips[c->num_tips])
2833 c->num_tips++;
2834 c->num_args = populateArgsStructure(c->args);
2835
2836 /* Handle the legacy range spec and the "movablekeys" flag (must be done after populating all key specs). */
2837 populateCommandLegacyRangeSpec(c);
2838
2839 /* Assign the ID used for ACL. */
2840 c->id = ACLGetCommandID(c->fullname);
2841
2842 /* Handle subcommands */
2843 if (c->subcommands) {
2844 for (int j = 0; c->subcommands[j].declared_name; j++) {
2845 struct redisCommand *sub = c->subcommands+j;
2846
2847 sub->fullname = catSubCommandFullname(c->declared_name, sub->declared_name);
2848 if (populateCommandStructure(sub) == C_ERR)
2849 continue;
2850
2851 commandAddSubcommand(c, sub, sub->declared_name);
2852 }
2853 }
2854
2855 return C_OK;
2856}
2857
2858extern struct redisCommand redisCommandTable[];
2859
2860/* Populates the Redis Command Table dict from the static table in commands.c
2861 * which is auto generated from the json files in the commands folder. */
2862void populateCommandTable(void) {
2863 int j;
2864 struct redisCommand *c;
2865
2866 for (j = 0;; j++) {
2867 c = redisCommandTable + j;
2868 if (c->declared_name == NULL)
2869 break;
2870
2871 int retval1, retval2;
2872
2873 c->fullname = sdsnew(c->declared_name);
2874 if (populateCommandStructure(c) == C_ERR)
2875 continue;
2876
2877 retval1 = dictAdd(server.commands, sdsdup(c->fullname), c);
2878 /* Populate an additional dictionary that will be unaffected
2879 * by rename-command statements in redis.conf. */
2880 retval2 = dictAdd(server.orig_commands, sdsdup(c->fullname), c);
2881 serverAssert(retval1 == DICT_OK && retval2 == DICT_OK);
2882 }
2883}
2884
2885void resetCommandTableStats(dict* commands) {
2886 struct redisCommand *c;
2887 dictEntry *de;
2888 dictIterator *di;
2889
2890 di = dictGetSafeIterator(commands);
2891 while((de = dictNext(di)) != NULL) {
2892 c = (struct redisCommand *) dictGetVal(de);
2893 c->microseconds = 0;
2894 c->calls = 0;
2895 c->rejected_calls = 0;
2896 c->failed_calls = 0;
2897 if(c->latency_histogram) {
2898 hdr_close(c->latency_histogram);
2899 c->latency_histogram = NULL;
2900 }
2901 if (c->subcommands_dict)
2902 resetCommandTableStats(c->subcommands_dict);
2903 }
2904 dictReleaseIterator(di);
2905}
2906
2907void resetErrorTableStats(void) {
2908 raxFreeWithCallback(server.errors, zfree);
2909 server.errors = raxNew();
2910}
2911
2912/* ========================== Redis OP Array API ============================ */
2913
2914void redisOpArrayInit(redisOpArray *oa) {
2915 oa->ops = NULL;
2916 oa->numops = 0;
2917 oa->capacity = 0;
2918}
2919
2920int redisOpArrayAppend(redisOpArray *oa, int dbid, robj **argv, int argc, int target) {
2921 redisOp *op;
2922 int prev_capacity = oa->capacity;
2923
2924 if (oa->numops == 0) {
2925 oa->capacity = 16;
2926 } else if (oa->numops >= oa->capacity) {
2927 oa->capacity *= 2;
2928 }
2929
2930 if (prev_capacity != oa->capacity)
2931 oa->ops = zrealloc(oa->ops,sizeof(redisOp)*oa->capacity);
2932 op = oa->ops+oa->numops;
2933 op->dbid = dbid;
2934 op->argv = argv;
2935 op->argc = argc;
2936 op->target = target;
2937 oa->numops++;
2938 return oa->numops;
2939}
2940
2941void redisOpArrayFree(redisOpArray *oa) {
2942 while(oa->numops) {
2943 int j;
2944 redisOp *op;
2945
2946 oa->numops--;
2947 op = oa->ops+oa->numops;
2948 for (j = 0; j < op->argc; j++)
2949 decrRefCount(op->argv[j]);
2950 zfree(op->argv);
2951 }
2952 zfree(oa->ops);
2953 redisOpArrayInit(oa);
2954}
2955
2956/* ====================== Commands lookup and execution ===================== */
2957
2958int isContainerCommandBySds(sds s) {
2959 struct redisCommand *base_cmd = dictFetchValue(server.commands, s);
2960 int has_subcommands = base_cmd && base_cmd->subcommands_dict;
2961 return has_subcommands;
2962}
2963
2964struct redisCommand *lookupSubcommand(struct redisCommand *container, sds sub_name) {
2965 return dictFetchValue(container->subcommands_dict, sub_name);
2966}
2967
2968/* Look up a command by argv and argc
2969 *
2970 * If `strict` is not 0 we expect argc to be exact (i.e. argc==2
2971 * for a subcommand and argc==1 for a top-level command)
2972 * `strict` should be used every time we want to look up a command
2973 * name (e.g. in COMMAND INFO) rather than to find the command
2974 * a user requested to execute (in processCommand).
2975 */
2976struct redisCommand *lookupCommandLogic(dict *commands, robj **argv, int argc, int strict) {
2977 struct redisCommand *base_cmd = dictFetchValue(commands, argv[0]->ptr);
2978 int has_subcommands = base_cmd && base_cmd->subcommands_dict;
2979 if (argc == 1 || !has_subcommands) {
2980 if (strict && argc != 1)
2981 return NULL;
2982 /* Note: It is possible that base_cmd->proc==NULL (e.g. CONFIG) */
2983 return base_cmd;
2984 } else { /* argc > 1 && has_subcommands */
2985 if (strict && argc != 2)
2986 return NULL;
2987 /* Note: Currently we support just one level of subcommands */
2988 return lookupSubcommand(base_cmd, argv[1]->ptr);
2989 }
2990}
2991
2992struct redisCommand *lookupCommand(robj **argv, int argc) {
2993 return lookupCommandLogic(server.commands,argv,argc,0);
2994}
2995
2996struct redisCommand *lookupCommandBySdsLogic(dict *commands, sds s) {
2997 int argc, j;
2998 sds *strings = sdssplitlen(s,sdslen(s),"|",1,&argc);
2999 if (strings == NULL)
3000 return NULL;
3001 if (argc > 2) {
3002 /* Currently we support just one level of subcommands */
3003 sdsfreesplitres(strings,argc);
3004 return NULL;
3005 }
3006
3007 robj objects[argc];
3008 robj *argv[argc];
3009 for (j = 0; j < argc; j++) {
3010 initStaticStringObject(objects[j],strings[j]);
3011 argv[j] = &objects[j];
3012 }
3013
3014 struct redisCommand *cmd = lookupCommandLogic(commands,argv,argc,1);
3015 sdsfreesplitres(strings,argc);
3016 return cmd;
3017}
3018
3019struct redisCommand *lookupCommandBySds(sds s) {
3020 return lookupCommandBySdsLogic(server.commands,s);
3021}
3022
3023struct redisCommand *lookupCommandByCStringLogic(dict *commands, const char *s) {
3024 struct redisCommand *cmd;
3025 sds name = sdsnew(s);
3026
3027 cmd = lookupCommandBySdsLogic(commands,name);
3028 sdsfree(name);
3029 return cmd;
3030}
3031
3032struct redisCommand *lookupCommandByCString(const char *s) {
3033 return lookupCommandByCStringLogic(server.commands,s);
3034}
3035
3036/* Lookup the command in the current table, if not found also check in
3037 * the original table containing the original command names unaffected by
3038 * redis.conf rename-command statement.
3039 *
3040 * This is used by functions rewriting the argument vector such as
3041 * rewriteClientCommandVector() in order to set client->cmd pointer
3042 * correctly even if the command was renamed. */
3043struct redisCommand *lookupCommandOrOriginal(robj **argv ,int argc) {
3044 struct redisCommand *cmd = lookupCommandLogic(server.commands, argv, argc, 0);
3045
3046 if (!cmd) cmd = lookupCommandLogic(server.orig_commands, argv, argc, 0);
3047 return cmd;
3048}
3049
3050/* Commands arriving from the master client or AOF client, should never be rejected. */
3051int mustObeyClient(client *c) {
3052 return c->id == CLIENT_ID_AOF || c->flags & CLIENT_MASTER;
3053}
3054
3055static int shouldPropagate(int target) {
3056 if (!server.replication_allowed || target == PROPAGATE_NONE || server.loading)
3057 return 0;
3058
3059 if (target & PROPAGATE_AOF) {
3060 if (server.aof_state != AOF_OFF)
3061 return 1;
3062 }
3063 if (target & PROPAGATE_REPL) {
3064 if (server.masterhost == NULL && (server.repl_backlog || listLength(server.slaves) != 0))
3065 return 1;
3066 }
3067
3068 return 0;
3069}
3070
3071/* Propagate the specified command (in the context of the specified database id)
3072 * to AOF and Slaves.
3073 *
3074 * flags are an xor between:
3075 * + PROPAGATE_NONE (no propagation of command at all)
3076 * + PROPAGATE_AOF (propagate into the AOF file if is enabled)
3077 * + PROPAGATE_REPL (propagate into the replication link)
3078 *
3079 * This is an internal low-level function and should not be called!
3080 *
3081 * The API for propagating commands is alsoPropagate().
3082 */
3083static void propagateNow(int dbid, robj **argv, int argc, int target) {
3084 if (!shouldPropagate(target))
3085 return;
3086
3087 /* This needs to be unreachable since the dataset should be fixed during
3088 * client pause, otherwise data may be lost during a failover. */
3089 serverAssert(!(areClientsPaused() && !server.client_pause_in_transaction));
3090
3091 if (server.aof_state != AOF_OFF && target & PROPAGATE_AOF)
3092 feedAppendOnlyFile(dbid,argv,argc);
3093 if (target & PROPAGATE_REPL)
3094 replicationFeedSlaves(server.slaves,dbid,argv,argc);
3095}
3096
3097/* Used inside commands to schedule the propagation of additional commands
3098 * after the current command is propagated to AOF / Replication.
3099 *
3100 * dbid is the database ID the command should be propagated into.
3101 * Arguments of the command to propagate are passed as an array of redis
3102 * objects pointers of len 'argc', using the 'argv' vector.
3103 *
3104 * The function does not take a reference to the passed 'argv' vector,
3105 * so it is up to the caller to release the passed argv (but it is usually
3106 * stack allocated). The function automatically increments ref count of
3107 * passed objects, so the caller does not need to. */
3108void alsoPropagate(int dbid, robj **argv, int argc, int target) {
3109 robj **argvcopy;
3110 int j;
3111
3112 if (!shouldPropagate(target))
3113 return;
3114
3115 argvcopy = zmalloc(sizeof(robj*)*argc);
3116 for (j = 0; j < argc; j++) {
3117 argvcopy[j] = argv[j];
3118 incrRefCount(argv[j]);
3119 }
3120 redisOpArrayAppend(&server.also_propagate,dbid,argvcopy,argc,target);
3121}
3122
3123/* It is possible to call the function forceCommandPropagation() inside a
3124 * Redis command implementation in order to to force the propagation of a
3125 * specific command execution into AOF / Replication. */
3126void forceCommandPropagation(client *c, int flags) {
3127 serverAssert(c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE));
3128 if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
3129 if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
3130}
3131
3132/* Avoid that the executed command is propagated at all. This way we
3133 * are free to just propagate what we want using the alsoPropagate()
3134 * API. */
3135void preventCommandPropagation(client *c) {
3136 c->flags |= CLIENT_PREVENT_PROP;
3137}
3138
3139/* AOF specific version of preventCommandPropagation(). */
3140void preventCommandAOF(client *c) {
3141 c->flags |= CLIENT_PREVENT_AOF_PROP;
3142}
3143
3144/* Replication specific version of preventCommandPropagation(). */
3145void preventCommandReplication(client *c) {
3146 c->flags |= CLIENT_PREVENT_REPL_PROP;
3147}
3148
3149/* Log the last command a client executed into the slowlog. */
3150void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t duration) {
3151 /* Some commands may contain sensitive data that should not be available in the slowlog. */
3152 if (cmd->flags & CMD_SKIP_SLOWLOG)
3153 return;
3154
3155 /* If command argument vector was rewritten, use the original
3156 * arguments. */
3157 robj **argv = c->original_argv ? c->original_argv : c->argv;
3158 int argc = c->original_argv ? c->original_argc : c->argc;
3159 slowlogPushEntryIfNeeded(c,argv,argc,duration);
3160}
3161
3162/* This function is called in order to update the total command histogram duration.
3163 * The latency unit is nano-seconds.
3164 * If needed it will allocate the histogram memory and trim the duration to the upper/lower tracking limits*/
3165void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist){
3166 if (duration_hist < LATENCY_HISTOGRAM_MIN_VALUE)
3167 duration_hist=LATENCY_HISTOGRAM_MIN_VALUE;
3168 if (duration_hist>LATENCY_HISTOGRAM_MAX_VALUE)
3169 duration_hist=LATENCY_HISTOGRAM_MAX_VALUE;
3170 if (*latency_histogram==NULL)
3171 hdr_init(LATENCY_HISTOGRAM_MIN_VALUE,LATENCY_HISTOGRAM_MAX_VALUE,LATENCY_HISTOGRAM_PRECISION,latency_histogram);
3172 hdr_record_value(*latency_histogram,duration_hist);
3173}
3174
3175/* Handle the alsoPropagate() API to handle commands that want to propagate
3176 * multiple separated commands. Note that alsoPropagate() is not affected
3177 * by CLIENT_PREVENT_PROP flag. */
3178void propagatePendingCommands() {
3179 if (server.also_propagate.numops == 0)
3180 return;
3181
3182 int j;
3183 redisOp *rop;
3184 int multi_emitted = 0;
3185
3186 /* Wrap the commands in server.also_propagate array,
3187 * but don't wrap it if we are already in MULTI context,
3188 * in case the nested MULTI/EXEC.
3189 *
3190 * And if the array contains only one command, no need to
3191 * wrap it, since the single command is atomic. */
3192 if (server.also_propagate.numops > 1 && !server.propagate_no_multi) {
3193 /* We use the first command-to-propagate to set the dbid for MULTI,
3194 * so that the SELECT will be propagated beforehand */
3195 int multi_dbid = server.also_propagate.ops[0].dbid;
3196 propagateNow(multi_dbid,&shared.multi,1,PROPAGATE_AOF|PROPAGATE_REPL);
3197 multi_emitted = 1;
3198 }
3199
3200 for (j = 0; j < server.also_propagate.numops; j++) {
3201 rop = &server.also_propagate.ops[j];
3202 serverAssert(rop->target);
3203 propagateNow(rop->dbid,rop->argv,rop->argc,rop->target);
3204 }
3205
3206 if (multi_emitted) {
3207 /* We take the dbid from last command so that propagateNow() won't inject another SELECT */
3208 int exec_dbid = server.also_propagate.ops[server.also_propagate.numops-1].dbid;
3209 propagateNow(exec_dbid,&shared.exec,1,PROPAGATE_AOF|PROPAGATE_REPL);
3210 }
3211
3212 redisOpArrayFree(&server.also_propagate);
3213}
3214
3215/* Increment the command failure counters (either rejected_calls or failed_calls).
3216 * The decision which counter to increment is done using the flags argument, options are:
3217 * * ERROR_COMMAND_REJECTED - update rejected_calls
3218 * * ERROR_COMMAND_FAILED - update failed_calls
3219 *
3220 * The function also reset the prev_err_count to make sure we will not count the same error
3221 * twice, its possible to pass a NULL cmd value to indicate that the error was counted elsewhere.
3222 *
3223 * The function returns true if stats was updated and false if not. */
3224int incrCommandStatsOnError(struct redisCommand *cmd, int flags) {
3225 /* hold the prev error count captured on the last command execution */
3226 static long long prev_err_count = 0;
3227 int res = 0;
3228 if (cmd) {
3229 if ((server.stat_total_error_replies - prev_err_count) > 0) {
3230 if (flags & ERROR_COMMAND_REJECTED) {
3231 cmd->rejected_calls++;
3232 res = 1;
3233 } else if (flags & ERROR_COMMAND_FAILED) {
3234 cmd->failed_calls++;
3235 res = 1;
3236 }
3237 }
3238 }
3239 prev_err_count = server.stat_total_error_replies;
3240 return res;
3241}
3242
3243/* Call() is the core of Redis execution of a command.
3244 *
3245 * The following flags can be passed:
3246 * CMD_CALL_NONE No flags.
3247 * CMD_CALL_SLOWLOG Check command speed and log in the slow log if needed.
3248 * CMD_CALL_STATS Populate command stats.
3249 * CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset
3250 * or if the client flags are forcing propagation.
3251 * CMD_CALL_PROPAGATE_REPL Send command to slaves if it modified the dataset
3252 * or if the client flags are forcing propagation.
3253 * CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL.
3254 * CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE.
3255 *
3256 * The exact propagation behavior depends on the client flags.
3257 * Specifically:
3258 *
3259 * 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set
3260 * and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set
3261 * in the call flags, then the command is propagated even if the
3262 * dataset was not affected by the command.
3263 * 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
3264 * are set, the propagation into AOF or to slaves is not performed even
3265 * if the command modified the dataset.
3266 *
3267 * Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
3268 * or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
3269 * slaves propagation will never occur.
3270 *
3271 * Client flags are modified by the implementation of a given command
3272 * using the following API:
3273 *
3274 * forceCommandPropagation(client *c, int flags);
3275 * preventCommandPropagation(client *c);
3276 * preventCommandAOF(client *c);
3277 * preventCommandReplication(client *c);
3278 *
3279 */
3280void call(client *c, int flags) {
3281 long long dirty;
3282 uint64_t client_old_flags = c->flags;
3283 struct redisCommand *real_cmd = c->realcmd;
3284
3285 /* Initialization: clear the flags that must be set by the command on
3286 * demand, and initialize the array for additional commands propagation. */
3287 c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
3288
3289 /* Redis core is in charge of propagation when the first entry point
3290 * of call() is processCommand().
3291 * The only other option to get to call() without having processCommand
3292 * as an entry point is if a module triggers RM_Call outside of call()
3293 * context (for example, in a timer).
3294 * In that case, the module is in charge of propagation.
3295 *
3296 * Because call() is re-entrant we have to cache and restore
3297 * server.core_propagates. */
3298 int prev_core_propagates = server.core_propagates;
3299 if (!server.core_propagates && !(flags & CMD_CALL_FROM_MODULE))
3300 server.core_propagates = 1;
3301
3302 /* Call the command. */
3303 dirty = server.dirty;
3304 incrCommandStatsOnError(NULL, 0);
3305
3306 const long long call_timer = ustime();
3307
3308 /* Update cache time, in case we have nested calls we want to
3309 * update only on the first call*/
3310 if (server.fixed_time_expire++ == 0) {
3311 updateCachedTimeWithUs(0,call_timer);
3312 }
3313
3314 monotime monotonic_start = 0;
3315 if (monotonicGetType() == MONOTONIC_CLOCK_HW)
3316 monotonic_start = getMonotonicUs();
3317
3318 server.in_nested_call++;
3319 c->cmd->proc(c);
3320 server.in_nested_call--;
3321
3322 /* In order to avoid performance implication due to querying the clock using a system call 3 times,
3323 * we use a monotonic clock, when we are sure its cost is very low, and fall back to non-monotonic call otherwise. */
3324 ustime_t duration;
3325 if (monotonicGetType() == MONOTONIC_CLOCK_HW)
3326 duration = getMonotonicUs() - monotonic_start;
3327 else
3328 duration = ustime() - call_timer;
3329
3330 c->duration = duration;
3331 dirty = server.dirty-dirty;
3332 if (dirty < 0) dirty = 0;
3333
3334 /* Update failed command calls if required. */
3335
3336 if (!incrCommandStatsOnError(real_cmd, ERROR_COMMAND_FAILED) && c->deferred_reply_errors) {
3337 /* When call is used from a module client, error stats, and total_error_replies
3338 * isn't updated since these errors, if handled by the module, are internal,
3339 * and not reflected to users. however, the commandstats does show these calls
3340 * (made by RM_Call), so it should log if they failed or succeeded. */
3341 real_cmd->failed_calls++;
3342 }
3343
3344 /* After executing command, we will close the client after writing entire
3345 * reply if it is set 'CLIENT_CLOSE_AFTER_COMMAND' flag. */
3346 if (c->flags & CLIENT_CLOSE_AFTER_COMMAND) {
3347 c->flags &= ~CLIENT_CLOSE_AFTER_COMMAND;
3348 c->flags |= CLIENT_CLOSE_AFTER_REPLY;
3349 }
3350
3351 /* When EVAL is called loading the AOF we don't want commands called
3352 * from Lua to go into the slowlog or to populate statistics. */
3353 if (server.loading && c->flags & CLIENT_SCRIPT)
3354 flags &= ~(CMD_CALL_SLOWLOG | CMD_CALL_STATS);
3355
3356 /* If the caller is Lua, we want to force the EVAL caller to propagate
3357 * the script if the command flag or client flag are forcing the
3358 * propagation. */
3359 if (c->flags & CLIENT_SCRIPT && server.script_caller) {
3360 if (c->flags & CLIENT_FORCE_REPL)
3361 server.script_caller->flags |= CLIENT_FORCE_REPL;
3362 if (c->flags & CLIENT_FORCE_AOF)
3363 server.script_caller->flags |= CLIENT_FORCE_AOF;
3364 }
3365
3366 /* Note: the code below uses the real command that was executed
3367 * c->cmd and c->lastcmd may be different, in case of MULTI-EXEC or
3368 * re-written commands such as EXPIRE, GEOADD, etc. */
3369
3370 /* Record the latency this command induced on the main thread.
3371 * unless instructed by the caller not to log. (happens when processing
3372 * a MULTI-EXEC from inside an AOF). */
3373 if (flags & CMD_CALL_SLOWLOG) {
3374 char *latency_event = (real_cmd->flags & CMD_FAST) ?
3375 "fast-command" : "command";
3376 latencyAddSampleIfNeeded(latency_event,duration/1000);
3377 }
3378
3379 /* Log the command into the Slow log if needed.
3380 * If the client is blocked we will handle slowlog when it is unblocked. */
3381 if ((flags & CMD_CALL_SLOWLOG) && !(c->flags & CLIENT_BLOCKED))
3382 slowlogPushCurrentCommand(c, real_cmd, duration);
3383
3384 /* Send the command to clients in MONITOR mode if applicable.
3385 * Administrative commands are considered too dangerous to be shown. */
3386 if (!(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN))) {
3387 robj **argv = c->original_argv ? c->original_argv : c->argv;
3388 int argc = c->original_argv ? c->original_argc : c->argc;
3389 replicationFeedMonitors(c,server.monitors,c->db->id,argv,argc);
3390 }
3391
3392 /* Clear the original argv.
3393 * If the client is blocked we will handle slowlog when it is unblocked. */
3394 if (!(c->flags & CLIENT_BLOCKED))
3395 freeClientOriginalArgv(c);
3396
3397 /* populate the per-command statistics that we show in INFO commandstats. */
3398 if (flags & CMD_CALL_STATS) {
3399 real_cmd->microseconds += duration;
3400 real_cmd->calls++;
3401 /* If the client is blocked we will handle latency stats when it is unblocked. */
3402 if (server.latency_tracking_enabled && !(c->flags & CLIENT_BLOCKED))
3403 updateCommandLatencyHistogram(&(real_cmd->latency_histogram), duration*1000);
3404 }
3405
3406 /* Propagate the command into the AOF and replication link.
3407 * We never propagate EXEC explicitly, it will be implicitly
3408 * propagated if needed (see propagatePendingCommands).
3409 * Also, module commands take care of themselves */
3410 if (flags & CMD_CALL_PROPAGATE &&
3411 (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP &&
3412 c->cmd->proc != execCommand &&
3413 !(c->cmd->flags & CMD_MODULE))
3414 {
3415 int propagate_flags = PROPAGATE_NONE;
3416
3417 /* Check if the command operated changes in the data set. If so
3418 * set for replication / AOF propagation. */
3419 if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);
3420
3421 /* If the client forced AOF / replication of the command, set
3422 * the flags regardless of the command effects on the data set. */
3423 if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
3424 if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
3425
3426 /* However prevent AOF / replication propagation if the command
3427 * implementation called preventCommandPropagation() or similar,
3428 * or if we don't have the call() flags to do so. */
3429 if (c->flags & CLIENT_PREVENT_REPL_PROP ||
3430 !(flags & CMD_CALL_PROPAGATE_REPL))
3431 propagate_flags &= ~PROPAGATE_REPL;
3432 if (c->flags & CLIENT_PREVENT_AOF_PROP ||
3433 !(flags & CMD_CALL_PROPAGATE_AOF))
3434 propagate_flags &= ~PROPAGATE_AOF;
3435
3436 /* Call alsoPropagate() only if at least one of AOF / replication
3437 * propagation is needed. */
3438 if (propagate_flags != PROPAGATE_NONE)
3439 alsoPropagate(c->db->id,c->argv,c->argc,propagate_flags);
3440 }
3441
3442 /* Restore the old replication flags, since call() can be executed
3443 * recursively. */
3444 c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
3445 c->flags |= client_old_flags &
3446 (CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
3447
3448 /* If the client has keys tracking enabled for client side caching,
3449 * make sure to remember the keys it fetched via this command. Scripting
3450 * works a bit differently, where if the scripts executes any read command, it
3451 * remembers all of the declared keys from the script. */
3452 if ((c->cmd->flags & CMD_READONLY) && (c->cmd->proc != evalRoCommand)
3453 && (c->cmd->proc != evalShaRoCommand) && (c->cmd->proc != fcallroCommand))
3454 {
3455 client *caller = (c->flags & CLIENT_SCRIPT && server.script_caller) ?
3456 server.script_caller : c;
3457 if (caller->flags & CLIENT_TRACKING &&
3458 !(caller->flags & CLIENT_TRACKING_BCAST))
3459 {
3460 trackingRememberKeys(caller);
3461 }
3462 }
3463
3464 server.fixed_time_expire--;
3465 server.stat_numcommands++;
3466
3467 /* Record peak memory after each command and before the eviction that runs
3468 * before the next command. */
3469 size_t zmalloc_used = zmalloc_used_memory();
3470 if (zmalloc_used > server.stat_peak_memory)
3471 server.stat_peak_memory = zmalloc_used;
3472
3473 /* Do some maintenance job and cleanup */
3474 afterCommand(c);
3475
3476 /* Client pause takes effect after a transaction has finished. This needs
3477 * to be located after everything is propagated. */
3478 if (!server.in_exec && server.client_pause_in_transaction) {
3479 server.client_pause_in_transaction = 0;
3480 }
3481
3482 server.core_propagates = prev_core_propagates;
3483}
3484
3485/* Used when a command that is ready for execution needs to be rejected, due to
3486 * various pre-execution checks. it returns the appropriate error to the client.
3487 * If there's a transaction is flags it as dirty, and if the command is EXEC,
3488 * it aborts the transaction.
3489 * Note: 'reply' is expected to end with \r\n */
3490void rejectCommand(client *c, robj *reply) {
3491 flagTransaction(c);
3492 if (c->cmd) c->cmd->rejected_calls++;
3493 if (c->cmd && c->cmd->proc == execCommand) {
3494 execCommandAbort(c, reply->ptr);
3495 } else {
3496 /* using addReplyError* rather than addReply so that the error can be logged. */
3497 addReplyErrorObject(c, reply);
3498 }
3499}
3500
3501void rejectCommandSds(client *c, sds s) {
3502 flagTransaction(c);
3503 if (c->cmd) c->cmd->rejected_calls++;
3504 if (c->cmd && c->cmd->proc == execCommand) {
3505 execCommandAbort(c, s);
3506 sdsfree(s);
3507 } else {
3508 /* The following frees 's'. */
3509 addReplyErrorSds(c, s);
3510 }
3511}
3512
3513void rejectCommandFormat(client *c, const char *fmt, ...) {
3514 va_list ap;
3515 va_start(ap,fmt);
3516 sds s = sdscatvprintf(sdsempty(),fmt,ap);
3517 va_end(ap);
3518 /* Make sure there are no newlines in the string, otherwise invalid protocol
3519 * is emitted (The args come from the user, they may contain any character). */
3520 sdsmapchars(s, "\r\n", " ", 2);
3521 rejectCommandSds(c, s);
3522}
3523
3524/* This is called after a command in call, we can do some maintenance job in it. */
3525void afterCommand(client *c) {
3526 UNUSED(c);
3527 if (!server.in_nested_call) {
3528 /* If we are at the top-most call() we can propagate what we accumulated.
3529 * Should be done before trackingHandlePendingKeyInvalidations so that we
3530 * reply to client before invalidating cache (makes more sense) */
3531 if (server.core_propagates)
3532 propagatePendingCommands();
3533 /* Flush pending invalidation messages only when we are not in nested call.
3534 * So the messages are not interleaved with transaction response. */
3535 trackingHandlePendingKeyInvalidations();
3536 }
3537}
3538
3539/* Check if c->cmd exists, fills `err` with details in case it doesn't.
3540 * Return 1 if exists. */
3541int commandCheckExistence(client *c, sds *err) {
3542 if (c->cmd)
3543 return 1;
3544 if (!err)
3545 return 0;
3546 if (isContainerCommandBySds(c->argv[0]->ptr)) {
3547 /* If we can't find the command but argv[0] by itself is a command
3548 * it means we're dealing with an invalid subcommand. Print Help. */
3549 sds cmd = sdsnew((char *)c->argv[0]->ptr);
3550 sdstoupper(cmd);
3551 *err = sdsnew(NULL);
3552 *err = sdscatprintf(*err, "unknown subcommand '%.128s'. Try %s HELP.",
3553 (char *)c->argv[1]->ptr, cmd);
3554 sdsfree(cmd);
3555 } else {
3556 sds args = sdsempty();
3557 int i;
3558 for (i=1; i < c->argc && sdslen(args) < 128; i++)
3559 args = sdscatprintf(args, "'%.*s' ", 128-(int)sdslen(args), (char*)c->argv[i]->ptr);
3560 *err = sdsnew(NULL);
3561 *err = sdscatprintf(*err, "unknown command '%.128s', with args beginning with: %s",
3562 (char*)c->argv[0]->ptr, args);
3563 sdsfree(args);
3564 }
3565 /* Make sure there are no newlines in the string, otherwise invalid protocol
3566 * is emitted (The args come from the user, they may contain any character). */
3567 sdsmapchars(*err, "\r\n", " ", 2);
3568 return 0;
3569}
3570
3571/* Check if c->argc is valid for c->cmd, fills `err` with details in case it isn't.
3572 * Return 1 if valid. */
3573int commandCheckArity(client *c, sds *err) {
3574 if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
3575 (c->argc < -c->cmd->arity))
3576 {
3577 if (err) {
3578 *err = sdsnew(NULL);
3579 *err = sdscatprintf(*err, "wrong number of arguments for '%s' command", c->cmd->fullname);
3580 }
3581 return 0;
3582 }
3583
3584 return 1;
3585}
3586
3587/* If this function gets called we already read a whole
3588 * command, arguments are in the client argv/argc fields.
3589 * processCommand() execute the command or prepare the
3590 * server for a bulk read from the client.
3591 *
3592 * If C_OK is returned the client is still alive and valid and
3593 * other operations can be performed by the caller. Otherwise
3594 * if C_ERR is returned the client was destroyed (i.e. after QUIT). */
3595int processCommand(client *c) {
3596 if (!scriptIsTimedout()) {
3597 /* Both EXEC and scripts call call() directly so there should be
3598 * no way in_exec or scriptIsRunning() is 1.
3599 * That is unless lua_timedout, in which case client may run
3600 * some commands. */
3601 serverAssert(!server.in_exec);
3602 serverAssert(!scriptIsRunning());
3603 }
3604
3605 moduleCallCommandFilters(c);
3606
3607 /* Handle possible security attacks. */
3608 if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) {
3609 securityWarningCommand(c);
3610 return C_ERR;
3611 }
3612
3613 /* If we're inside a module blocked context yielding that wants to avoid
3614 * processing clients, postpone the command. */
3615 if (server.busy_module_yield_flags != BUSY_MODULE_YIELD_NONE &&
3616 !(server.busy_module_yield_flags & BUSY_MODULE_YIELD_CLIENTS))
3617 {
3618 c->bpop.timeout = 0;
3619 blockClient(c,BLOCKED_POSTPONE);
3620 return C_OK;
3621 }
3622
3623 /* Now lookup the command and check ASAP about trivial error conditions
3624 * such as wrong arity, bad command name and so forth. */
3625 c->cmd = c->lastcmd = c->realcmd = lookupCommand(c->argv,c->argc);
3626 sds err;
3627 if (!commandCheckExistence(c, &err)) {
3628 rejectCommandSds(c, err);
3629 return C_OK;
3630 }
3631 if (!commandCheckArity(c, &err)) {
3632 rejectCommandSds(c, err);
3633 return C_OK;
3634 }
3635
3636 /* Check if the command is marked as protected and the relevant configuration allows it */
3637 if (c->cmd->flags & CMD_PROTECTED) {
3638 if ((c->cmd->proc == debugCommand && !allowProtectedAction(server.enable_debug_cmd, c)) ||
3639 (c->cmd->proc == moduleCommand && !allowProtectedAction(server.enable_module_cmd, c)))
3640 {
3641 rejectCommandFormat(c,"%s command not allowed. If the %s option is set to \"local\", "
3642 "you can run it from a local connection, otherwise you need to set this option "
3643 "in the configuration file, and then restart the server.",
3644 c->cmd->proc == debugCommand ? "DEBUG" : "MODULE",
3645 c->cmd->proc == debugCommand ? "enable-debug-command" : "enable-module-command");
3646 return C_OK;
3647
3648 }
3649 }
3650
3651 /* If we're executing a script, try to extract a set of command flags from
3652 * it, in case it declared them. Note this is just an attempt, we don't yet
3653 * know the script command is well formed.*/
3654 uint64_t cmd_flags = c->cmd->flags;
3655 if (c->cmd->proc == evalCommand || c->cmd->proc == evalShaCommand ||
3656 c->cmd->proc == evalRoCommand || c->cmd->proc == evalShaRoCommand ||
3657 c->cmd->proc == fcallCommand || c->cmd->proc == fcallroCommand)
3658 {
3659 if (c->cmd->proc == fcallCommand || c->cmd->proc == fcallroCommand)
3660 cmd_flags = fcallGetCommandFlags(c, cmd_flags);
3661 else
3662 cmd_flags = evalGetCommandFlags(c, cmd_flags);
3663 }
3664
3665 int is_read_command = (cmd_flags & CMD_READONLY) ||
3666 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_READONLY));
3667 int is_write_command = (cmd_flags & CMD_WRITE) ||
3668 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
3669 int is_denyoom_command = (cmd_flags & CMD_DENYOOM) ||
3670 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_DENYOOM));
3671 int is_denystale_command = !(cmd_flags & CMD_STALE) ||
3672 (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_STALE));
3673 int is_denyloading_command = !(cmd_flags & CMD_LOADING) ||
3674 (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_LOADING));
3675 int is_may_replicate_command = (cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)) ||
3676 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)));
3677 int is_deny_async_loading_command = (cmd_flags & CMD_NO_ASYNC_LOADING) ||
3678 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_NO_ASYNC_LOADING));
3679 int obey_client = mustObeyClient(c);
3680
3681 if (authRequired(c)) {
3682 /* AUTH and HELLO and no auth commands are valid even in
3683 * non-authenticated state. */
3684 if (!(c->cmd->flags & CMD_NO_AUTH)) {
3685 rejectCommand(c,shared.noautherr);
3686 return C_OK;
3687 }
3688 }
3689
3690 if (c->flags & CLIENT_MULTI && c->cmd->flags & CMD_NO_MULTI) {
3691 rejectCommandFormat(c,"Command not allowed inside a transaction");
3692 return C_OK;
3693 }
3694
3695 /* Check if the user can run this command according to the current
3696 * ACLs. */
3697 int acl_errpos;
3698 int acl_retval = ACLCheckAllPerm(c,&acl_errpos);
3699 if (acl_retval != ACL_OK) {
3700 addACLLogEntry(c,acl_retval,(c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL,acl_errpos,NULL,NULL);
3701 switch (acl_retval) {
3702 case ACL_DENIED_CMD:
3703 {
3704 rejectCommandFormat(c,
3705 "-NOPERM this user has no permissions to run "
3706 "the '%s' command", c->cmd->fullname);
3707 break;
3708 }
3709 case ACL_DENIED_KEY:
3710 rejectCommandFormat(c,
3711 "-NOPERM this user has no permissions to access "
3712 "one of the keys used as arguments");
3713 break;
3714 case ACL_DENIED_CHANNEL:
3715 rejectCommandFormat(c,
3716 "-NOPERM this user has no permissions to access "
3717 "one of the channels used as arguments");
3718 break;
3719 default:
3720 rejectCommandFormat(c, "no permission");
3721 break;
3722 }
3723 return C_OK;
3724 }
3725
3726 /* If cluster is enabled perform the cluster redirection here.
3727 * However we don't perform the redirection if:
3728 * 1) The sender of this command is our master.
3729 * 2) The command has no key arguments. */
3730 if (server.cluster_enabled &&
3731 !mustObeyClient(c) &&
3732 !(!(c->cmd->flags&CMD_MOVABLE_KEYS) && c->cmd->key_specs_num == 0 &&
3733 c->cmd->proc != execCommand))
3734 {
3735 int error_code;
3736 clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
3737 &c->slot,&error_code);
3738 if (n == NULL || n != server.cluster->myself) {
3739 if (c->cmd->proc == execCommand) {
3740 discardTransaction(c);
3741 } else {
3742 flagTransaction(c);
3743 }
3744 clusterRedirectClient(c,n,c->slot,error_code);
3745 c->cmd->rejected_calls++;
3746 return C_OK;
3747 }
3748 }
3749
3750 /* Disconnect some clients if total clients memory is too high. We do this
3751 * before key eviction, after the last command was executed and consumed
3752 * some client output buffer memory. */
3753 evictClients();
3754 if (server.current_client == NULL) {
3755 /* If we evicted ourself then abort processing the command */
3756 return C_ERR;
3757 }
3758
3759 /* Handle the maxmemory directive.
3760 *
3761 * Note that we do not want to reclaim memory if we are here re-entering
3762 * the event loop since there is a busy Lua script running in timeout
3763 * condition, to avoid mixing the propagation of scripts with the
3764 * propagation of DELs due to eviction. */
3765 if (server.maxmemory && !isInsideYieldingLongCommand()) {
3766 int out_of_memory = (performEvictions() == EVICT_FAIL);
3767
3768 /* performEvictions may evict keys, so we need flush pending tracking
3769 * invalidation keys. If we don't do this, we may get an invalidation
3770 * message after we perform operation on the key, where in fact this
3771 * message belongs to the old value of the key before it gets evicted.*/
3772 trackingHandlePendingKeyInvalidations();
3773
3774 /* performEvictions may flush slave output buffers. This may result
3775 * in a slave, that may be the active client, to be freed. */
3776 if (server.current_client == NULL) return C_ERR;
3777
3778 int reject_cmd_on_oom = is_denyoom_command;
3779 /* If client is in MULTI/EXEC context, queuing may consume an unlimited
3780 * amount of memory, so we want to stop that.
3781 * However, we never want to reject DISCARD, or even EXEC (unless it
3782 * contains denied commands, in which case is_denyoom_command is already
3783 * set. */
3784 if (c->flags & CLIENT_MULTI &&
3785 c->cmd->proc != execCommand &&
3786 c->cmd->proc != discardCommand &&
3787 c->cmd->proc != quitCommand &&
3788 c->cmd->proc != resetCommand) {
3789 reject_cmd_on_oom = 1;
3790 }
3791
3792 if (out_of_memory && reject_cmd_on_oom) {
3793 rejectCommand(c, shared.oomerr);
3794 return C_OK;
3795 }
3796
3797 /* Save out_of_memory result at command start, otherwise if we check OOM
3798 * in the first write within script, memory used by lua stack and
3799 * arguments might interfere. We need to save it for EXEC and module
3800 * calls too, since these can call EVAL, but avoid saving it during an
3801 * interrupted / yielding busy script / module. */
3802 server.pre_command_oom_state = out_of_memory;
3803 }
3804
3805 /* Make sure to use a reasonable amount of memory for client side
3806 * caching metadata. */
3807 if (server.tracking_clients) trackingLimitUsedSlots();
3808
3809 /* Don't accept write commands if there are problems persisting on disk
3810 * unless coming from our master, in which case check the replica ignore
3811 * disk write error config to either log or crash. */
3812 int deny_write_type = writeCommandsDeniedByDiskError();
3813 if (deny_write_type != DISK_ERROR_TYPE_NONE &&
3814 (is_write_command || c->cmd->proc == pingCommand))
3815 {
3816 if (obey_client) {
3817 if (!server.repl_ignore_disk_write_error && c->cmd->proc != pingCommand) {
3818 serverPanic("Replica was unable to write command to disk.");
3819 } else {
3820 static mstime_t last_log_time_ms = 0;
3821 const mstime_t log_interval_ms = 10000;
3822 if (server.mstime > last_log_time_ms + log_interval_ms) {
3823 last_log_time_ms = server.mstime;
3824 serverLog(LL_WARNING, "Replica is applying a command even though "
3825 "it is unable to write to disk.");
3826 }
3827 }
3828 } else {
3829 sds err = writeCommandsGetDiskErrorMessage(deny_write_type);
3830 /* remove the newline since rejectCommandSds adds it. */
3831 sdssubstr(err, 0, sdslen(err)-2);
3832 rejectCommandSds(c, err);
3833 return C_OK;
3834 }
3835 }
3836
3837 /* Don't accept write commands if there are not enough good slaves and
3838 * user configured the min-slaves-to-write option. */
3839 if (is_write_command && !checkGoodReplicasStatus()) {
3840 rejectCommand(c, shared.noreplicaserr);
3841 return C_OK;
3842 }
3843
3844 /* Don't accept write commands if this is a read only slave. But
3845 * accept write commands if this is our master. */
3846 if (server.masterhost && server.repl_slave_ro &&
3847 !obey_client &&
3848 is_write_command)
3849 {
3850 rejectCommand(c, shared.roslaveerr);
3851 return C_OK;
3852 }
3853
3854 /* Only allow a subset of commands in the context of Pub/Sub if the
3855 * connection is in RESP2 mode. With RESP3 there are no limits. */
3856 if ((c->flags & CLIENT_PUBSUB && c->resp == 2) &&
3857 c->cmd->proc != pingCommand &&
3858 c->cmd->proc != subscribeCommand &&
3859 c->cmd->proc != ssubscribeCommand &&
3860 c->cmd->proc != unsubscribeCommand &&
3861 c->cmd->proc != sunsubscribeCommand &&
3862 c->cmd->proc != psubscribeCommand &&
3863 c->cmd->proc != punsubscribeCommand &&
3864 c->cmd->proc != quitCommand &&
3865 c->cmd->proc != resetCommand) {
3866 rejectCommandFormat(c,
3867 "Can't execute '%s': only (P|S)SUBSCRIBE / "
3868 "(P|S)UNSUBSCRIBE / PING / QUIT / RESET are allowed in this context",
3869 c->cmd->fullname);
3870 return C_OK;
3871 }
3872
3873 /* Only allow commands with flag "t", such as INFO, REPLICAOF and so on,
3874 * when replica-serve-stale-data is no and we are a replica with a broken
3875 * link with master. */
3876 if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
3877 server.repl_serve_stale_data == 0 &&
3878 is_denystale_command)
3879 {
3880 rejectCommand(c, shared.masterdownerr);
3881 return C_OK;
3882 }
3883
3884 /* Loading DB? Return an error if the command has not the
3885 * CMD_LOADING flag. */
3886 if (server.loading && !server.async_loading && is_denyloading_command) {
3887 rejectCommand(c, shared.loadingerr);
3888 return C_OK;
3889 }
3890
3891 /* During async-loading, block certain commands. */
3892 if (server.async_loading && is_deny_async_loading_command) {
3893 rejectCommand(c,shared.loadingerr);
3894 return C_OK;
3895 }
3896
3897 /* when a busy job is being done (script / module)
3898 * Only allow a limited number of commands.
3899 * Note that we need to allow the transactions commands, otherwise clients
3900 * sending a transaction with pipelining without error checking, may have
3901 * the MULTI plus a few initial commands refused, then the timeout
3902 * condition resolves, and the bottom-half of the transaction gets
3903 * executed, see Github PR #7022. */
3904 if (isInsideYieldingLongCommand() && !(c->cmd->flags & CMD_ALLOW_BUSY)) {
3905 if (server.busy_module_yield_flags && server.busy_module_yield_reply) {
3906 rejectCommandFormat(c, "-BUSY %s", server.busy_module_yield_reply);
3907 } else if (server.busy_module_yield_flags) {
3908 rejectCommand(c, shared.slowmoduleerr);
3909 } else if (scriptIsEval()) {
3910 rejectCommand(c, shared.slowevalerr);
3911 } else {
3912 rejectCommand(c, shared.slowscripterr);
3913 }
3914 return C_OK;
3915 }
3916
3917 /* Prevent a replica from sending commands that access the keyspace.
3918 * The main objective here is to prevent abuse of client pause check
3919 * from which replicas are exempt. */
3920 if ((c->flags & CLIENT_SLAVE) && (is_may_replicate_command || is_write_command || is_read_command)) {
3921 rejectCommandFormat(c, "Replica can't interact with the keyspace");
3922 return C_OK;
3923 }
3924
3925 /* If the server is paused, block the client until
3926 * the pause has ended. Replicas are never paused. */
3927 if (!(c->flags & CLIENT_SLAVE) &&
3928 ((server.client_pause_type == CLIENT_PAUSE_ALL) ||
3929 (server.client_pause_type == CLIENT_PAUSE_WRITE && is_may_replicate_command)))
3930 {
3931 c->bpop.timeout = 0;
3932 blockClient(c,BLOCKED_POSTPONE);
3933 return C_OK;
3934 }
3935
3936 /* Exec the command */
3937 if (c->flags & CLIENT_MULTI &&
3938 c->cmd->proc != execCommand &&
3939 c->cmd->proc != discardCommand &&
3940 c->cmd->proc != multiCommand &&
3941 c->cmd->proc != watchCommand &&
3942 c->cmd->proc != quitCommand &&
3943 c->cmd->proc != resetCommand)
3944 {
3945 queueMultiCommand(c, cmd_flags);
3946 addReply(c,shared.queued);
3947 } else {
3948 call(c,CMD_CALL_FULL);
3949 c->woff = server.master_repl_offset;
3950 if (listLength(server.ready_keys))
3951 handleClientsBlockedOnKeys();
3952 }
3953
3954 return C_OK;
3955}
3956
3957/* ====================== Error lookup and execution ===================== */
3958
3959void incrementErrorCount(const char *fullerr, size_t namelen) {
3960 struct redisError *error = raxFind(server.errors,(unsigned char*)fullerr,namelen);
3961 if (error == raxNotFound) {
3962 error = zmalloc(sizeof(*error));
3963 error->count = 0;
3964 raxInsert(server.errors,(unsigned char*)fullerr,namelen,error,NULL);
3965 }
3966 error->count++;
3967}
3968
3969/*================================== Shutdown =============================== */
3970
3971/* Close listening sockets. Also unlink the unix domain socket if
3972 * unlink_unix_socket is non-zero. */
3973void closeListeningSockets(int unlink_unix_socket) {
3974 int j;
3975
3976 for (j = 0; j < server.ipfd.count; j++) close(server.ipfd.fd[j]);
3977 for (j = 0; j < server.tlsfd.count; j++) close(server.tlsfd.fd[j]);
3978 if (server.sofd != -1) close(server.sofd);
3979 if (server.cluster_enabled)
3980 for (j = 0; j < server.cfd.count; j++) close(server.cfd.fd[j]);
3981 if (unlink_unix_socket && server.unixsocket) {
3982 serverLog(LL_NOTICE,"Removing the unix socket file.");
3983 if (unlink(server.unixsocket) != 0)
3984 serverLog(LL_WARNING,"Error removing the unix socket file: %s",strerror(errno));
3985 }
3986}
3987
3988/* Prepare for shutting down the server. Flags:
3989 *
3990 * - SHUTDOWN_SAVE: Save a database dump even if the server is configured not to
3991 * save any dump.
3992 *
3993 * - SHUTDOWN_NOSAVE: Don't save any database dump even if the server is
3994 * configured to save one.
3995 *
3996 * - SHUTDOWN_NOW: Don't wait for replicas to catch up before shutting down.
3997 *
3998 * - SHUTDOWN_FORCE: Ignore errors writing AOF and RDB files on disk, which
3999 * would normally prevent a shutdown.
4000 *
4001 * Unless SHUTDOWN_NOW is set and if any replicas are lagging behind, C_ERR is
4002 * returned and server.shutdown_mstime is set to a timestamp to allow a grace
4003 * period for the replicas to catch up. This is checked and handled by
4004 * serverCron() which completes the shutdown as soon as possible.
4005 *
4006 * If shutting down fails due to errors writing RDB or AOF files, C_ERR is
4007 * returned and an error is logged. If the flag SHUTDOWN_FORCE is set, these
4008 * errors are logged but ignored and C_OK is returned.
4009 *
4010 * On success, this function returns C_OK and then it's OK to call exit(0). */
4011int prepareForShutdown(int flags) {
4012 if (isShutdownInitiated()) return C_ERR;
4013
4014 /* When SHUTDOWN is called while the server is loading a dataset in
4015 * memory we need to make sure no attempt is performed to save
4016 * the dataset on shutdown (otherwise it could overwrite the current DB
4017 * with half-read data).
4018 *
4019 * Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */
4020 if (server.loading || server.sentinel_mode)
4021 flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE;
4022
4023 server.shutdown_flags = flags;
4024
4025 serverLog(LL_WARNING,"User requested shutdown...");
4026 if (server.supervised_mode == SUPERVISED_SYSTEMD)
4027 redisCommunicateSystemd("STOPPING=1\n");
4028
4029 /* If we have any replicas, let them catch up the replication offset before
4030 * we shut down, to avoid data loss. */
4031 if (!(flags & SHUTDOWN_NOW) &&
4032 server.shutdown_timeout != 0 &&
4033 !isReadyToShutdown())
4034 {
4035 server.shutdown_mstime = server.mstime + server.shutdown_timeout * 1000;
4036 if (!areClientsPaused()) sendGetackToReplicas();
4037 pauseClients(PAUSE_DURING_SHUTDOWN, LLONG_MAX, CLIENT_PAUSE_WRITE);
4038 serverLog(LL_NOTICE, "Waiting for replicas before shutting down.");
4039 return C_ERR;
4040 }
4041
4042 return finishShutdown();
4043}
4044
4045static inline int isShutdownInitiated(void) {
4046 return server.shutdown_mstime != 0;
4047}
4048
4049/* Returns 0 if there are any replicas which are lagging in replication which we
4050 * need to wait for before shutting down. Returns 1 if we're ready to shut
4051 * down now. */
4052int isReadyToShutdown(void) {
4053 if (listLength(server.slaves) == 0) return 1; /* No replicas. */
4054
4055 listIter li;
4056 listNode *ln;
4057 listRewind(server.slaves, &li);
4058 while ((ln = listNext(&li)) != NULL) {
4059 client *replica = listNodeValue(ln);
4060 if (replica->repl_ack_off != server.master_repl_offset) return 0;
4061 }
4062 return 1;
4063}
4064
4065static void cancelShutdown(void) {
4066 server.shutdown_asap = 0;
4067 server.shutdown_flags = 0;
4068 server.shutdown_mstime = 0;
4069 server.last_sig_received = 0;
4070 replyToClientsBlockedOnShutdown();
4071 unpauseClients(PAUSE_DURING_SHUTDOWN);
4072}
4073
4074/* Returns C_OK if shutdown was aborted and C_ERR if shutdown wasn't ongoing. */
4075int abortShutdown(void) {
4076 if (isShutdownInitiated()) {
4077 cancelShutdown();
4078 } else if (server.shutdown_asap) {
4079 /* Signal handler has requested shutdown, but it hasn't been initiated
4080 * yet. Just clear the flag. */
4081 server.shutdown_asap = 0;
4082 } else {
4083 /* Shutdown neither initiated nor requested. */
4084 return C_ERR;
4085 }
4086 serverLog(LL_NOTICE, "Shutdown manually aborted.");
4087 return C_OK;
4088}
4089
4090/* The final step of the shutdown sequence. Returns C_OK if the shutdown
4091 * sequence was successful and it's OK to call exit(). If C_ERR is returned,
4092 * it's not safe to call exit(). */
4093int finishShutdown(void) {
4094
4095 int save = server.shutdown_flags & SHUTDOWN_SAVE;
4096 int nosave = server.shutdown_flags & SHUTDOWN_NOSAVE;
4097 int force = server.shutdown_flags & SHUTDOWN_FORCE;
4098
4099 /* Log a warning for each replica that is lagging. */
4100 listIter replicas_iter;
4101 listNode *replicas_list_node;
4102 int num_replicas = 0, num_lagging_replicas = 0;
4103 listRewind(server.slaves, &replicas_iter);
4104 while ((replicas_list_node = listNext(&replicas_iter)) != NULL) {
4105 client *replica = listNodeValue(replicas_list_node);
4106 num_replicas++;
4107 if (replica->repl_ack_off != server.master_repl_offset) {
4108 num_lagging_replicas++;
4109 long lag = replica->replstate == SLAVE_STATE_ONLINE ?
4110 time(NULL) - replica->repl_ack_time : 0;
4111 serverLog(LL_WARNING,
4112 "Lagging replica %s reported offset %lld behind master, lag=%ld, state=%s.",
4113 replicationGetSlaveName(replica),
4114 server.master_repl_offset - replica->repl_ack_off,
4115 lag,
4116 replstateToString(replica->replstate));
4117 }
4118 }
4119 if (num_replicas > 0) {
4120 serverLog(LL_NOTICE,
4121 "%d of %d replicas are in sync when shutting down.",
4122 num_replicas - num_lagging_replicas,
4123 num_replicas);
4124 }
4125
4126 /* Kill all the Lua debugger forked sessions. */
4127 ldbKillForkedSessions();
4128
4129 /* Kill the saving child if there is a background saving in progress.
4130 We want to avoid race conditions, for instance our saving child may
4131 overwrite the synchronous saving did by SHUTDOWN. */
4132 if (server.child_type == CHILD_TYPE_RDB) {
4133 serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
4134 killRDBChild();
4135 /* Note that, in killRDBChild normally has backgroundSaveDoneHandler
4136 * doing it's cleanup, but in this case this code will not be reached,
4137 * so we need to call rdbRemoveTempFile which will close fd(in order
4138 * to unlink file actually) in background thread.
4139 * The temp rdb file fd may won't be closed when redis exits quickly,
4140 * but OS will close this fd when process exits. */
4141 rdbRemoveTempFile(server.child_pid, 0);
4142 }
4143
4144 /* Kill module child if there is one. */
4145 if (server.child_type == CHILD_TYPE_MODULE) {
4146 serverLog(LL_WARNING,"There is a module fork child. Killing it!");
4147 TerminateModuleForkChild(server.child_pid,0);
4148 }
4149
4150 /* Kill the AOF saving child as the AOF we already have may be longer
4151 * but contains the full dataset anyway. */
4152 if (server.child_type == CHILD_TYPE_AOF) {
4153 /* If we have AOF enabled but haven't written the AOF yet, don't
4154 * shutdown or else the dataset will be lost. */
4155 if (server.aof_state == AOF_WAIT_REWRITE) {
4156 if (force) {
4157 serverLog(LL_WARNING, "Writing initial AOF. Exit anyway.");
4158 } else {
4159 serverLog(LL_WARNING, "Writing initial AOF, can't exit.");
4160 goto error;
4161 }
4162 }
4163 serverLog(LL_WARNING,
4164 "There is a child rewriting the AOF. Killing it!");
4165 killAppendOnlyChild();
4166 }
4167 if (server.aof_state != AOF_OFF) {
4168 /* Append only file: flush buffers and fsync() the AOF at exit */
4169 serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
4170 flushAppendOnlyFile(1);
4171 if (redis_fsync(server.aof_fd) == -1) {
4172 serverLog(LL_WARNING,"Fail to fsync the AOF file: %s.",
4173 strerror(errno));
4174 }
4175 }
4176
4177 /* Create a new RDB file before exiting. */
4178 if ((server.saveparamslen > 0 && !nosave) || save) {
4179 serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting.");
4180 if (server.supervised_mode == SUPERVISED_SYSTEMD)
4181 redisCommunicateSystemd("STATUS=Saving the final RDB snapshot\n");
4182 /* Snapshotting. Perform a SYNC SAVE and exit */
4183 rdbSaveInfo rsi, *rsiptr;
4184 rsiptr = rdbPopulateSaveInfo(&rsi);
4185 if (rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr) != C_OK) {
4186 /* Ooops.. error saving! The best we can do is to continue
4187 * operating. Note that if there was a background saving process,
4188 * in the next cron() Redis will be notified that the background
4189 * saving aborted, handling special stuff like slaves pending for
4190 * synchronization... */
4191 if (force) {
4192 serverLog(LL_WARNING,"Error trying to save the DB. Exit anyway.");
4193 } else {
4194 serverLog(LL_WARNING,"Error trying to save the DB, can't exit.");
4195 if (server.supervised_mode == SUPERVISED_SYSTEMD)
4196 redisCommunicateSystemd("STATUS=Error trying to save the DB, can't exit.\n");
4197 goto error;
4198 }
4199 }
4200 }
4201
4202 /* Free the AOF manifest. */
4203 if (server.aof_manifest) aofManifestFree(server.aof_manifest);
4204
4205 /* Fire the shutdown modules event. */
4206 moduleFireServerEvent(REDISMODULE_EVENT_SHUTDOWN,0,NULL);
4207
4208 /* Remove the pid file if possible and needed. */
4209 if (server.daemonize || server.pidfile) {
4210 serverLog(LL_NOTICE,"Removing the pid file.");
4211 unlink(server.pidfile);
4212 }
4213
4214 /* Best effort flush of slave output buffers, so that we hopefully
4215 * send them pending writes. */
4216 flushSlavesOutputBuffers();
4217
4218 /* Close the listening sockets. Apparently this allows faster restarts. */
4219 closeListeningSockets(1);
4220
4221 /* Unlock the cluster config file before shutdown */
4222 if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) {
4223 flock(server.cluster_config_file_lock_fd, LOCK_UN|LOCK_NB);
4224 }
4225
4226 serverLog(LL_WARNING,"%s is now ready to exit, bye bye...",
4227 server.sentinel_mode ? "Sentinel" : "Redis");
4228 return C_OK;
4229
4230error:
4231 serverLog(LL_WARNING, "Errors trying to shut down the server. Check the logs for more information.");
4232 cancelShutdown();
4233 return C_ERR;
4234}
4235
4236/*================================== Commands =============================== */
4237
4238/* Sometimes Redis cannot accept write commands because there is a persistence
4239 * error with the RDB or AOF file, and Redis is configured in order to stop
4240 * accepting writes in such situation. This function returns if such a
4241 * condition is active, and the type of the condition.
4242 *
4243 * Function return values:
4244 *
4245 * DISK_ERROR_TYPE_NONE: No problems, we can accept writes.
4246 * DISK_ERROR_TYPE_AOF: Don't accept writes: AOF errors.
4247 * DISK_ERROR_TYPE_RDB: Don't accept writes: RDB errors.
4248 */
4249int writeCommandsDeniedByDiskError(void) {
4250 if (server.stop_writes_on_bgsave_err &&
4251 server.saveparamslen > 0 &&
4252 server.lastbgsave_status == C_ERR)
4253 {
4254 return DISK_ERROR_TYPE_RDB;
4255 } else if (server.aof_state != AOF_OFF) {
4256 if (server.aof_last_write_status == C_ERR) {
4257 return DISK_ERROR_TYPE_AOF;
4258 }
4259 /* AOF fsync error. */
4260 int aof_bio_fsync_status;
4261 atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
4262 if (aof_bio_fsync_status == C_ERR) {
4263 atomicGet(server.aof_bio_fsync_errno,server.aof_last_write_errno);
4264 return DISK_ERROR_TYPE_AOF;
4265 }
4266 }
4267
4268 return DISK_ERROR_TYPE_NONE;
4269}
4270
4271sds writeCommandsGetDiskErrorMessage(int error_code) {
4272 sds ret = NULL;
4273 if (error_code == DISK_ERROR_TYPE_RDB) {
4274 ret = sdsdup(shared.bgsaveerr->ptr);
4275 } else {
4276 ret = sdscatfmt(sdsempty(),
4277 "-MISCONF Errors writing to the AOF file: %s\r\n",
4278 strerror(server.aof_last_write_errno));
4279 }
4280 return ret;
4281}
4282
4283/* The PING command. It works in a different way if the client is in
4284 * in Pub/Sub mode. */
4285void pingCommand(client *c) {
4286 /* The command takes zero or one arguments. */
4287 if (c->argc > 2) {
4288 addReplyErrorArity(c);
4289 return;
4290 }
4291
4292 if (c->flags & CLIENT_PUBSUB && c->resp == 2) {
4293 addReply(c,shared.mbulkhdr[2]);
4294 addReplyBulkCBuffer(c,"pong",4);
4295 if (c->argc == 1)
4296 addReplyBulkCBuffer(c,"",0);
4297 else
4298 addReplyBulk(c,c->argv[1]);
4299 } else {
4300 if (c->argc == 1)
4301 addReply(c,shared.pong);
4302 else
4303 addReplyBulk(c,c->argv[1]);
4304 }
4305}
4306
4307void echoCommand(client *c) {
4308 addReplyBulk(c,c->argv[1]);
4309}
4310
4311void timeCommand(client *c) {
4312 struct timeval tv;
4313
4314 /* gettimeofday() can only fail if &tv is a bad address so we
4315 * don't check for errors. */
4316 gettimeofday(&tv,NULL);
4317 addReplyArrayLen(c,2);
4318 addReplyBulkLongLong(c,tv.tv_sec);
4319 addReplyBulkLongLong(c,tv.tv_usec);
4320}
4321
4322typedef struct replyFlagNames {
4323 uint64_t flag;
4324 const char *name;
4325} replyFlagNames;
4326
4327/* Helper function to output flags. */
4328void addReplyCommandFlags(client *c, uint64_t flags, replyFlagNames *replyFlags) {
4329 int count = 0, j=0;
4330 /* Count them so we don't have to use deferred reply. */
4331 while (replyFlags[j].name) {
4332 if (flags & replyFlags[j].flag)
4333 count++;
4334 j++;
4335 }
4336
4337 addReplySetLen(c, count);
4338 j = 0;
4339 while (replyFlags[j].name) {
4340 if (flags & replyFlags[j].flag)
4341 addReplyStatus(c, replyFlags[j].name);
4342 j++;
4343 }
4344}
4345
4346void addReplyFlagsForCommand(client *c, struct redisCommand *cmd) {
4347 replyFlagNames flagNames[] = {
4348 {CMD_WRITE, "write"},
4349 {CMD_READONLY, "readonly"},
4350 {CMD_DENYOOM, "denyoom"},
4351 {CMD_MODULE, "module"},
4352 {CMD_ADMIN, "admin"},
4353 {CMD_PUBSUB, "pubsub"},
4354 {CMD_NOSCRIPT, "noscript"},
4355 {CMD_BLOCKING, "blocking"},
4356 {CMD_LOADING, "loading"},
4357 {CMD_STALE, "stale"},
4358 {CMD_SKIP_MONITOR, "skip_monitor"},
4359 {CMD_SKIP_SLOWLOG, "skip_slowlog"},
4360 {CMD_ASKING, "asking"},
4361 {CMD_FAST, "fast"},
4362 {CMD_NO_AUTH, "no_auth"},
4363 /* {CMD_MAY_REPLICATE, "may_replicate"},, Hidden on purpose */
4364 /* {CMD_SENTINEL, "sentinel"}, Hidden on purpose */
4365 /* {CMD_ONLY_SENTINEL, "only_sentinel"}, Hidden on purpose */
4366 {CMD_NO_MANDATORY_KEYS, "no_mandatory_keys"},
4367 /* {CMD_PROTECTED, "protected"}, Hidden on purpose */
4368 {CMD_NO_ASYNC_LOADING, "no_async_loading"},
4369 {CMD_NO_MULTI, "no_multi"},
4370 {CMD_MOVABLE_KEYS, "movablekeys"},
4371 {CMD_ALLOW_BUSY, "allow_busy"},
4372 {0,NULL}
4373 };
4374 addReplyCommandFlags(c, cmd->flags, flagNames);
4375}
4376
4377void addReplyDocFlagsForCommand(client *c, struct redisCommand *cmd) {
4378 replyFlagNames docFlagNames[] = {
4379 {CMD_DOC_DEPRECATED, "deprecated"},
4380 {CMD_DOC_SYSCMD, "syscmd"},
4381 {0,NULL}
4382 };
4383 addReplyCommandFlags(c, cmd->doc_flags, docFlagNames);
4384}
4385
4386void addReplyFlagsForKeyArgs(client *c, uint64_t flags) {
4387 replyFlagNames docFlagNames[] = {
4388 {CMD_KEY_RO, "RO"},
4389 {CMD_KEY_RW, "RW"},
4390 {CMD_KEY_OW, "OW"},
4391 {CMD_KEY_RM, "RM"},
4392 {CMD_KEY_ACCESS, "access"},
4393 {CMD_KEY_UPDATE, "update"},
4394 {CMD_KEY_INSERT, "insert"},
4395 {CMD_KEY_DELETE, "delete"},
4396 {CMD_KEY_NOT_KEY, "not_key"},
4397 {CMD_KEY_INCOMPLETE, "incomplete"},
4398 {CMD_KEY_VARIABLE_FLAGS, "variable_flags"},
4399 {0,NULL}
4400 };
4401 addReplyCommandFlags(c, flags, docFlagNames);
4402}
4403
4404/* Must match redisCommandArgType */
4405const char *ARG_TYPE_STR[] = {
4406 "string",
4407 "integer",
4408 "double",
4409 "key",
4410 "pattern",
4411 "unix-time",
4412 "pure-token",
4413 "oneof",
4414 "block",
4415};
4416
4417void addReplyFlagsForArg(client *c, uint64_t flags) {
4418 replyFlagNames argFlagNames[] = {
4419 {CMD_ARG_OPTIONAL, "optional"},
4420 {CMD_ARG_MULTIPLE, "multiple"},
4421 {CMD_ARG_MULTIPLE_TOKEN, "multiple_token"},
4422 {0,NULL}
4423 };
4424 addReplyCommandFlags(c, flags, argFlagNames);
4425}
4426
4427void addReplyCommandArgList(client *c, struct redisCommandArg *args, int num_args) {
4428 addReplyArrayLen(c, num_args);
4429 for (int j = 0; j<num_args; j++) {
4430 /* Count our reply len so we don't have to use deferred reply. */
4431 long maplen = 2;
4432 if (args[j].key_spec_index != -1) maplen++;
4433 if (args[j].token) maplen++;
4434 if (args[j].summary) maplen++;
4435 if (args[j].since) maplen++;
4436 if (args[j].deprecated_since) maplen++;
4437 if (args[j].flags) maplen++;
4438 if (args[j].type == ARG_TYPE_ONEOF || args[j].type == ARG_TYPE_BLOCK)
4439 maplen++;
4440 addReplyMapLen(c, maplen);
4441
4442 addReplyBulkCString(c, "name");
4443 addReplyBulkCString(c, args[j].name);
4444
4445 addReplyBulkCString(c, "type");
4446 addReplyBulkCString(c, ARG_TYPE_STR[args[j].type]);
4447
4448 if (args[j].key_spec_index != -1) {
4449 addReplyBulkCString(c, "key_spec_index");
4450 addReplyLongLong(c, args[j].key_spec_index);
4451 }
4452 if (args[j].token) {
4453 addReplyBulkCString(c, "token");
4454 addReplyBulkCString(c, args[j].token);
4455 }
4456 if (args[j].summary) {
4457 addReplyBulkCString(c, "summary");
4458 addReplyBulkCString(c, args[j].summary);
4459 }
4460 if (args[j].since) {
4461 addReplyBulkCString(c, "since");
4462 addReplyBulkCString(c, args[j].since);
4463 }
4464 if (args[j].deprecated_since) {
4465 addReplyBulkCString(c, "deprecated_since");
4466 addReplyBulkCString(c, args[j].deprecated_since);
4467 }
4468 if (args[j].flags) {
4469 addReplyBulkCString(c, "flags");
4470 addReplyFlagsForArg(c, args[j].flags);
4471 }
4472 if (args[j].type == ARG_TYPE_ONEOF || args[j].type == ARG_TYPE_BLOCK) {
4473 addReplyBulkCString(c, "arguments");
4474 addReplyCommandArgList(c, args[j].subargs, args[j].num_args);
4475 }
4476 }
4477}
4478
4479/* Must match redisCommandRESP2Type */
4480const char *RESP2_TYPE_STR[] = {
4481 "simple-string",
4482 "error",
4483 "integer",
4484 "bulk-string",
4485 "null-bulk-string",
4486 "array",
4487 "null-array",
4488};
4489
4490/* Must match redisCommandRESP3Type */
4491const char *RESP3_TYPE_STR[] = {
4492 "simple-string",
4493 "error",
4494 "integer",
4495 "double",
4496 "bulk-string",
4497 "array",
4498 "map",
4499 "set",
4500 "bool",
4501 "null",
4502};
4503
4504void addReplyCommandHistory(client *c, struct redisCommand *cmd) {
4505 addReplySetLen(c, cmd->num_history);
4506 for (int j = 0; j<cmd->num_history; j++) {
4507 addReplyArrayLen(c, 2);
4508 addReplyBulkCString(c, cmd->history[j].since);
4509 addReplyBulkCString(c, cmd->history[j].changes);
4510 }
4511}
4512
4513void addReplyCommandTips(client *c, struct redisCommand *cmd) {
4514 addReplySetLen(c, cmd->num_tips);
4515 for (int j = 0; j<cmd->num_tips; j++) {
4516 addReplyBulkCString(c, cmd->tips[j]);
4517 }
4518}
4519
4520void addReplyCommandKeySpecs(client *c, struct redisCommand *cmd) {
4521 addReplySetLen(c, cmd->key_specs_num);
4522 for (int i = 0; i < cmd->key_specs_num; i++) {
4523 int maplen = 3;
4524 if (cmd->key_specs[i].notes) maplen++;
4525
4526 addReplyMapLen(c, maplen);
4527
4528 if (cmd->key_specs[i].notes) {
4529 addReplyBulkCString(c, "notes");
4530 addReplyBulkCString(c,cmd->key_specs[i].notes);
4531 }
4532
4533 addReplyBulkCString(c, "flags");
4534 addReplyFlagsForKeyArgs(c,cmd->key_specs[i].flags);
4535
4536 addReplyBulkCString(c, "begin_search");
4537 switch (cmd->key_specs[i].begin_search_type) {
4538 case KSPEC_BS_UNKNOWN:
4539 addReplyMapLen(c, 2);
4540 addReplyBulkCString(c, "type");
4541 addReplyBulkCString(c, "unknown");
4542
4543 addReplyBulkCString(c, "spec");
4544 addReplyMapLen(c, 0);
4545 break;
4546 case KSPEC_BS_INDEX:
4547 addReplyMapLen(c, 2);
4548 addReplyBulkCString(c, "type");
4549 addReplyBulkCString(c, "index");
4550
4551 addReplyBulkCString(c, "spec");
4552 addReplyMapLen(c, 1);
4553 addReplyBulkCString(c, "index");
4554 addReplyLongLong(c, cmd->key_specs[i].bs.index.pos);
4555 break;
4556 case KSPEC_BS_KEYWORD:
4557 addReplyMapLen(c, 2);
4558 addReplyBulkCString(c, "type");
4559 addReplyBulkCString(c, "keyword");
4560
4561 addReplyBulkCString(c, "spec");
4562 addReplyMapLen(c, 2);
4563 addReplyBulkCString(c, "keyword");
4564 addReplyBulkCString(c, cmd->key_specs[i].bs.keyword.keyword);
4565 addReplyBulkCString(c, "startfrom");
4566 addReplyLongLong(c, cmd->key_specs[i].bs.keyword.startfrom);
4567 break;
4568 default:
4569 serverPanic("Invalid begin_search key spec type %d", cmd->key_specs[i].begin_search_type);
4570 }
4571
4572 addReplyBulkCString(c, "find_keys");
4573 switch (cmd->key_specs[i].find_keys_type) {
4574 case KSPEC_FK_UNKNOWN:
4575 addReplyMapLen(c, 2);
4576 addReplyBulkCString(c, "type");
4577 addReplyBulkCString(c, "unknown");
4578
4579 addReplyBulkCString(c, "spec");
4580 addReplyMapLen(c, 0);
4581 break;
4582 case KSPEC_FK_RANGE:
4583 addReplyMapLen(c, 2);
4584 addReplyBulkCString(c, "type");
4585 addReplyBulkCString(c, "range");
4586
4587 addReplyBulkCString(c, "spec");
4588 addReplyMapLen(c, 3);
4589 addReplyBulkCString(c, "lastkey");
4590 addReplyLongLong(c, cmd->key_specs[i].fk.range.lastkey);
4591 addReplyBulkCString(c, "keystep");
4592 addReplyLongLong(c, cmd->key_specs[i].fk.range.keystep);
4593 addReplyBulkCString(c, "limit");
4594 addReplyLongLong(c, cmd->key_specs[i].fk.range.limit);
4595 break;
4596 case KSPEC_FK_KEYNUM:
4597 addReplyMapLen(c, 2);
4598 addReplyBulkCString(c, "type");
4599 addReplyBulkCString(c, "keynum");
4600
4601 addReplyBulkCString(c, "spec");
4602 addReplyMapLen(c, 3);
4603 addReplyBulkCString(c, "keynumidx");
4604 addReplyLongLong(c, cmd->key_specs[i].fk.keynum.keynumidx);
4605 addReplyBulkCString(c, "firstkey");
4606 addReplyLongLong(c, cmd->key_specs[i].fk.keynum.firstkey);
4607 addReplyBulkCString(c, "keystep");
4608 addReplyLongLong(c, cmd->key_specs[i].fk.keynum.keystep);
4609 break;
4610 default:
4611 serverPanic("Invalid find_keys key spec type %d", cmd->key_specs[i].begin_search_type);
4612 }
4613 }
4614}
4615
4616/* Reply with an array of sub-command using the provided reply callback. */
4617void addReplyCommandSubCommands(client *c, struct redisCommand *cmd, void (*reply_function)(client*, struct redisCommand*), int use_map) {
4618 if (!cmd->subcommands_dict) {
4619 addReplySetLen(c, 0);
4620 return;
4621 }
4622
4623 if (use_map)
4624 addReplyMapLen(c, dictSize(cmd->subcommands_dict));
4625 else
4626 addReplyArrayLen(c, dictSize(cmd->subcommands_dict));
4627 dictEntry *de;
4628 dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict);
4629 while((de = dictNext(di)) != NULL) {
4630 struct redisCommand *sub = (struct redisCommand *)dictGetVal(de);
4631 if (use_map)
4632 addReplyBulkCBuffer(c, sub->fullname, sdslen(sub->fullname));
4633 reply_function(c, sub);
4634 }
4635 dictReleaseIterator(di);
4636}
4637
4638/* Must match redisCommandGroup */
4639const char *COMMAND_GROUP_STR[] = {
4640 "generic",
4641 "string",
4642 "list",
4643 "set",
4644 "sorted-set",
4645 "hash",
4646 "pubsub",
4647 "transactions",
4648 "connection",
4649 "server",
4650 "scripting",
4651 "hyperloglog",
4652 "cluster",
4653 "sentinel",
4654 "geo",
4655 "stream",
4656 "bitmap",
4657 "module"
4658};
4659
4660/* Output the representation of a Redis command. Used by the COMMAND command and COMMAND INFO. */
4661void addReplyCommandInfo(client *c, struct redisCommand *cmd) {
4662 if (!cmd) {
4663 addReplyNull(c);
4664 } else {
4665 int firstkey = 0, lastkey = 0, keystep = 0;
4666 if (cmd->legacy_range_key_spec.begin_search_type != KSPEC_BS_INVALID) {
4667 firstkey = cmd->legacy_range_key_spec.bs.index.pos;
4668 lastkey = cmd->legacy_range_key_spec.fk.range.lastkey;
4669 if (lastkey >= 0)
4670 lastkey += firstkey;
4671 keystep = cmd->legacy_range_key_spec.fk.range.keystep;
4672 }
4673
4674 addReplyArrayLen(c, 10);
4675 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
4676 addReplyLongLong(c, cmd->arity);
4677 addReplyFlagsForCommand(c, cmd);
4678 addReplyLongLong(c, firstkey);
4679 addReplyLongLong(c, lastkey);
4680 addReplyLongLong(c, keystep);
4681 addReplyCommandCategories(c, cmd);
4682 addReplyCommandTips(c, cmd);
4683 addReplyCommandKeySpecs(c, cmd);
4684 addReplyCommandSubCommands(c, cmd, addReplyCommandInfo, 0);
4685 }
4686}
4687
4688/* Output the representation of a Redis command. Used by the COMMAND DOCS. */
4689void addReplyCommandDocs(client *c, struct redisCommand *cmd) {
4690 /* Count our reply len so we don't have to use deferred reply. */
4691 long maplen = 1;
4692 if (cmd->summary) maplen++;
4693 if (cmd->since) maplen++;
4694 if (cmd->flags & CMD_MODULE) maplen++;
4695 if (cmd->complexity) maplen++;
4696 if (cmd->doc_flags) maplen++;
4697 if (cmd->deprecated_since) maplen++;
4698 if (cmd->replaced_by) maplen++;
4699 if (cmd->history) maplen++;
4700 if (cmd->args) maplen++;
4701 if (cmd->subcommands_dict) maplen++;
4702 addReplyMapLen(c, maplen);
4703
4704 if (cmd->summary) {
4705 addReplyBulkCString(c, "summary");
4706 addReplyBulkCString(c, cmd->summary);
4707 }
4708 if (cmd->since) {
4709 addReplyBulkCString(c, "since");
4710 addReplyBulkCString(c, cmd->since);
4711 }
4712
4713 /* Always have the group, for module commands the group is always "module". */
4714 addReplyBulkCString(c, "group");
4715 addReplyBulkCString(c, COMMAND_GROUP_STR[cmd->group]);
4716
4717 if (cmd->complexity) {
4718 addReplyBulkCString(c, "complexity");
4719 addReplyBulkCString(c, cmd->complexity);
4720 }
4721 if (cmd->flags & CMD_MODULE) {
4722 addReplyBulkCString(c, "module");
4723 addReplyBulkCString(c, moduleNameFromCommand(cmd));
4724 }
4725 if (cmd->doc_flags) {
4726 addReplyBulkCString(c, "doc_flags");
4727 addReplyDocFlagsForCommand(c, cmd);
4728 }
4729 if (cmd->deprecated_since) {
4730 addReplyBulkCString(c, "deprecated_since");
4731 addReplyBulkCString(c, cmd->deprecated_since);
4732 }
4733 if (cmd->replaced_by) {
4734 addReplyBulkCString(c, "replaced_by");
4735 addReplyBulkCString(c, cmd->replaced_by);
4736 }
4737 if (cmd->history) {
4738 addReplyBulkCString(c, "history");
4739 addReplyCommandHistory(c, cmd);
4740 }
4741 if (cmd->args) {
4742 addReplyBulkCString(c, "arguments");
4743 addReplyCommandArgList(c, cmd->args, cmd->num_args);
4744 }
4745 if (cmd->subcommands_dict) {
4746 addReplyBulkCString(c, "subcommands");
4747 addReplyCommandSubCommands(c, cmd, addReplyCommandDocs, 1);
4748 }
4749}
4750
4751/* Helper for COMMAND GETKEYS and GETKEYSANDFLAGS */
4752void getKeysSubcommandImpl(client *c, int with_flags) {
4753 struct redisCommand *cmd = lookupCommand(c->argv+2,c->argc-2);
4754 getKeysResult result = GETKEYS_RESULT_INIT;
4755 int j;
4756
4757 if (!cmd) {
4758 addReplyError(c,"Invalid command specified");
4759 return;
4760 } else if (!doesCommandHaveKeys(cmd)) {
4761 addReplyError(c,"The command has no key arguments");
4762 return;
4763 } else if ((cmd->arity > 0 && cmd->arity != c->argc-2) ||
4764 ((c->argc-2) < -cmd->arity))
4765 {
4766 addReplyError(c,"Invalid number of arguments specified for command");
4767 return;
4768 }
4769
4770 if (!getKeysFromCommandWithSpecs(cmd,c->argv+2,c->argc-2,GET_KEYSPEC_DEFAULT,&result)) {
4771 if (cmd->flags & CMD_NO_MANDATORY_KEYS) {
4772 addReplyArrayLen(c,0);
4773 } else {
4774 addReplyError(c,"Invalid arguments specified for command");
4775 }
4776 } else {
4777 addReplyArrayLen(c,result.numkeys);
4778 for (j = 0; j < result.numkeys; j++) {
4779 if (!with_flags) {
4780 addReplyBulk(c,c->argv[result.keys[j].pos+2]);
4781 } else {
4782 addReplyArrayLen(c,2);
4783 addReplyBulk(c,c->argv[result.keys[j].pos+2]);
4784 addReplyFlagsForKeyArgs(c,result.keys[j].flags);
4785 }
4786 }
4787 }
4788 getKeysFreeResult(&result);
4789}
4790
4791/* COMMAND GETKEYSANDFLAGS cmd arg1 arg2 ... */
4792void commandGetKeysAndFlagsCommand(client *c) {
4793 getKeysSubcommandImpl(c, 1);
4794}
4795
4796/* COMMAND GETKEYS cmd arg1 arg2 ... */
4797void getKeysSubcommand(client *c) {
4798 getKeysSubcommandImpl(c, 0);
4799}
4800
4801/* COMMAND (no args) */
4802void commandCommand(client *c) {
4803 dictIterator *di;
4804 dictEntry *de;
4805
4806 addReplyArrayLen(c, dictSize(server.commands));
4807 di = dictGetIterator(server.commands);
4808 while ((de = dictNext(di)) != NULL) {
4809 addReplyCommandInfo(c, dictGetVal(de));
4810 }
4811 dictReleaseIterator(di);
4812}
4813
4814/* COMMAND COUNT */
4815void commandCountCommand(client *c) {
4816 addReplyLongLong(c, dictSize(server.commands));
4817}
4818
4819typedef enum {
4820 COMMAND_LIST_FILTER_MODULE,
4821 COMMAND_LIST_FILTER_ACLCAT,
4822 COMMAND_LIST_FILTER_PATTERN,
4823} commandListFilterType;
4824
4825typedef struct {
4826 commandListFilterType type;
4827 sds arg;
4828 struct {
4829 int valid;
4830 union {
4831 uint64_t aclcat;
4832 void *module_handle;
4833 } u;
4834 } cache;
4835} commandListFilter;
4836
4837int shouldFilterFromCommandList(struct redisCommand *cmd, commandListFilter *filter) {
4838 switch (filter->type) {
4839 case (COMMAND_LIST_FILTER_MODULE):
4840 if (!filter->cache.valid) {
4841 filter->cache.u.module_handle = moduleGetHandleByName(filter->arg);
4842 filter->cache.valid = 1;
4843 }
4844 return !moduleIsModuleCommand(filter->cache.u.module_handle, cmd);
4845 case (COMMAND_LIST_FILTER_ACLCAT): {
4846 if (!filter->cache.valid) {
4847 filter->cache.u.aclcat = ACLGetCommandCategoryFlagByName(filter->arg);
4848 filter->cache.valid = 1;
4849 }
4850 uint64_t cat = filter->cache.u.aclcat;
4851 if (cat == 0)
4852 return 1; /* Invalid ACL category */
4853 return (!(cmd->acl_categories & cat));
4854 break;
4855 }
4856 case (COMMAND_LIST_FILTER_PATTERN):
4857 return !stringmatchlen(filter->arg, sdslen(filter->arg), cmd->fullname, sdslen(cmd->fullname), 1);
4858 default:
4859 serverPanic("Invalid filter type %d", filter->type);
4860 }
4861}
4862
4863/* COMMAND LIST FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>) */
4864void commandListWithFilter(client *c, dict *commands, commandListFilter filter, int *numcmds) {
4865 dictEntry *de;
4866 dictIterator *di = dictGetIterator(commands);
4867
4868 while ((de = dictNext(di)) != NULL) {
4869 struct redisCommand *cmd = dictGetVal(de);
4870 if (!shouldFilterFromCommandList(cmd,&filter)) {
4871 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
4872 (*numcmds)++;
4873 }
4874
4875 if (cmd->subcommands_dict) {
4876 commandListWithFilter(c, cmd->subcommands_dict, filter, numcmds);
4877 }
4878 }
4879 dictReleaseIterator(di);
4880}
4881
4882/* COMMAND LIST */
4883void commandListWithoutFilter(client *c, dict *commands, int *numcmds) {
4884 dictEntry *de;
4885 dictIterator *di = dictGetIterator(commands);
4886
4887 while ((de = dictNext(di)) != NULL) {
4888 struct redisCommand *cmd = dictGetVal(de);
4889 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
4890 (*numcmds)++;
4891
4892 if (cmd->subcommands_dict) {
4893 commandListWithoutFilter(c, cmd->subcommands_dict, numcmds);
4894 }
4895 }
4896 dictReleaseIterator(di);
4897}
4898
4899/* COMMAND LIST [FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>)] */
4900void commandListCommand(client *c) {
4901
4902 /* Parse options. */
4903 int i = 2, got_filter = 0;
4904 commandListFilter filter = {0};
4905 for (; i < c->argc; i++) {
4906 int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
4907 char *opt = c->argv[i]->ptr;
4908 if (!strcasecmp(opt,"filterby") && moreargs == 2) {
4909 char *filtertype = c->argv[i+1]->ptr;
4910 if (!strcasecmp(filtertype,"module")) {
4911 filter.type = COMMAND_LIST_FILTER_MODULE;
4912 } else if (!strcasecmp(filtertype,"aclcat")) {
4913 filter.type = COMMAND_LIST_FILTER_ACLCAT;
4914 } else if (!strcasecmp(filtertype,"pattern")) {
4915 filter.type = COMMAND_LIST_FILTER_PATTERN;
4916 } else {
4917 addReplyErrorObject(c,shared.syntaxerr);
4918 return;
4919 }
4920 got_filter = 1;
4921 filter.arg = c->argv[i+2]->ptr;
4922 i += 2;
4923 } else {
4924 addReplyErrorObject(c,shared.syntaxerr);
4925 return;
4926 }
4927 }
4928
4929 int numcmds = 0;
4930 void *replylen = addReplyDeferredLen(c);
4931
4932 if (got_filter) {
4933 commandListWithFilter(c, server.commands, filter, &numcmds);
4934 } else {
4935 commandListWithoutFilter(c, server.commands, &numcmds);
4936 }
4937
4938 setDeferredArrayLen(c,replylen,numcmds);
4939}
4940
4941/* COMMAND INFO [<command-name> ...] */
4942void commandInfoCommand(client *c) {
4943 int i;
4944
4945 if (c->argc == 2) {
4946 dictIterator *di;
4947 dictEntry *de;
4948 addReplyArrayLen(c, dictSize(server.commands));
4949 di = dictGetIterator(server.commands);
4950 while ((de = dictNext(di)) != NULL) {
4951 addReplyCommandInfo(c, dictGetVal(de));
4952 }
4953 dictReleaseIterator(di);
4954 } else {
4955 addReplyArrayLen(c, c->argc-2);
4956 for (i = 2; i < c->argc; i++) {
4957 addReplyCommandInfo(c, lookupCommandBySds(c->argv[i]->ptr));
4958 }
4959 }
4960}
4961
4962/* COMMAND DOCS [command-name [command-name ...]] */
4963void commandDocsCommand(client *c) {
4964 int i;
4965 if (c->argc == 2) {
4966 /* Reply with an array of all commands */
4967 dictIterator *di;
4968 dictEntry *de;
4969 addReplyMapLen(c, dictSize(server.commands));
4970 di = dictGetIterator(server.commands);
4971 while ((de = dictNext(di)) != NULL) {
4972 struct redisCommand *cmd = dictGetVal(de);
4973 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
4974 addReplyCommandDocs(c, cmd);
4975 }
4976 dictReleaseIterator(di);
4977 } else {
4978 /* Reply with an array of the requested commands (if we find them) */
4979 int numcmds = 0;
4980 void *replylen = addReplyDeferredLen(c);
4981 for (i = 2; i < c->argc; i++) {
4982 struct redisCommand *cmd = lookupCommandBySds(c->argv[i]->ptr);
4983 if (!cmd)
4984 continue;
4985 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
4986 addReplyCommandDocs(c, cmd);
4987 numcmds++;
4988 }
4989 setDeferredMapLen(c,replylen,numcmds);
4990 }
4991}
4992
4993/* COMMAND GETKEYS arg0 arg1 arg2 ... */
4994void commandGetKeysCommand(client *c) {
4995 getKeysSubcommand(c);
4996}
4997
4998/* COMMAND HELP */
4999void commandHelpCommand(client *c) {
5000 const char *help[] = {
5001"(no subcommand)",
5002" Return details about all Redis commands.",
5003"COUNT",
5004" Return the total number of commands in this Redis server.",
5005"LIST",
5006" Return a list of all commands in this Redis server.",
5007"INFO [<command-name> ...]",
5008" Return details about multiple Redis commands.",
5009" If no command names are given, documentation details for all",
5010" commands are returned.",
5011"DOCS [<command-name> ...]",
5012" Return documentation details about multiple Redis commands.",
5013" If no command names are given, documentation details for all",
5014" commands are returned.",
5015"GETKEYS <full-command>",
5016" Return the keys from a full Redis command.",
5017"GETKEYSANDFLAGS <full-command>",
5018" Return the keys and the access flags from a full Redis command.",
5019NULL
5020 };
5021
5022 addReplyHelp(c, help);
5023}
5024
5025/* Convert an amount of bytes into a human readable string in the form
5026 * of 100B, 2G, 100M, 4K, and so forth. */
5027void bytesToHuman(char *s, unsigned long long n) {
5028 double d;
5029
5030 if (n < 1024) {
5031 /* Bytes */
5032 sprintf(s,"%lluB",n);
5033 } else if (n < (1024*1024)) {
5034 d = (double)n/(1024);
5035 sprintf(s,"%.2fK",d);
5036 } else if (n < (1024LL*1024*1024)) {
5037 d = (double)n/(1024*1024);
5038 sprintf(s,"%.2fM",d);
5039 } else if (n < (1024LL*1024*1024*1024)) {
5040 d = (double)n/(1024LL*1024*1024);
5041 sprintf(s,"%.2fG",d);
5042 } else if (n < (1024LL*1024*1024*1024*1024)) {
5043 d = (double)n/(1024LL*1024*1024*1024);
5044 sprintf(s,"%.2fT",d);
5045 } else if (n < (1024LL*1024*1024*1024*1024*1024)) {
5046 d = (double)n/(1024LL*1024*1024*1024*1024);
5047 sprintf(s,"%.2fP",d);
5048 } else {
5049 /* Let's hope we never need this */
5050 sprintf(s,"%lluB",n);
5051 }
5052}
5053
5054/* Fill percentile distribution of latencies. */
5055sds fillPercentileDistributionLatencies(sds info, const char* histogram_name, struct hdr_histogram* histogram) {
5056 info = sdscatfmt(info,"latency_percentiles_usec_%s:",histogram_name);
5057 for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
5058 char fbuf[128];
5059 size_t len = sprintf(fbuf, "%f", server.latency_tracking_info_percentiles[j]);
5060 len = trimDoubleString(fbuf, len);
5061 info = sdscatprintf(info,"p%s=%.3f", fbuf,
5062 ((double)hdr_value_at_percentile(histogram,server.latency_tracking_info_percentiles[j]))/1000.0f);
5063 if (j != server.latency_tracking_info_percentiles_len-1)
5064 info = sdscatlen(info,",",1);
5065 }
5066 info = sdscatprintf(info,"\r\n");
5067 return info;
5068}
5069
5070const char *replstateToString(int replstate) {
5071 switch (replstate) {
5072 case SLAVE_STATE_WAIT_BGSAVE_START:
5073 case SLAVE_STATE_WAIT_BGSAVE_END:
5074 return "wait_bgsave";
5075 case SLAVE_STATE_SEND_BULK:
5076 return "send_bulk";
5077 case SLAVE_STATE_ONLINE:
5078 return "online";
5079 default:
5080 return "";
5081 }
5082}
5083
5084/* Characters we sanitize on INFO output to maintain expected format. */
5085static char unsafe_info_chars[] = "#:\n\r";
5086static char unsafe_info_chars_substs[] = "____"; /* Must be same length as above */
5087
5088/* Returns a sanitized version of s that contains no unsafe info string chars.
5089 * If no unsafe characters are found, simply returns s. Caller needs to
5090 * free tmp if it is non-null on return.
5091 */
5092const char *getSafeInfoString(const char *s, size_t len, char **tmp) {
5093 *tmp = NULL;
5094 if (mempbrk(s, len, unsafe_info_chars,sizeof(unsafe_info_chars)-1)
5095 == NULL) return s;
5096 char *new = *tmp = zmalloc(len + 1);
5097 memcpy(new, s, len);
5098 new[len] = '\0';
5099 return memmapchars(new, len, unsafe_info_chars, unsafe_info_chars_substs,
5100 sizeof(unsafe_info_chars)-1);
5101}
5102
5103sds genRedisInfoStringCommandStats(sds info, dict *commands) {
5104 struct redisCommand *c;
5105 dictEntry *de;
5106 dictIterator *di;
5107 di = dictGetSafeIterator(commands);
5108 while((de = dictNext(di)) != NULL) {
5109 char *tmpsafe;
5110 c = (struct redisCommand *) dictGetVal(de);
5111 if (c->calls || c->failed_calls || c->rejected_calls) {
5112 info = sdscatprintf(info,
5113 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f"
5114 ",rejected_calls=%lld,failed_calls=%lld\r\n",
5115 getSafeInfoString(c->fullname, sdslen(c->fullname), &tmpsafe), c->calls, c->microseconds,
5116 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls),
5117 c->rejected_calls, c->failed_calls);
5118 if (tmpsafe != NULL) zfree(tmpsafe);
5119 }
5120 if (c->subcommands_dict) {
5121 info = genRedisInfoStringCommandStats(info, c->subcommands_dict);
5122 }
5123 }
5124 dictReleaseIterator(di);
5125
5126 return info;
5127}
5128
5129sds genRedisInfoStringLatencyStats(sds info, dict *commands) {
5130 struct redisCommand *c;
5131 dictEntry *de;
5132 dictIterator *di;
5133 di = dictGetSafeIterator(commands);
5134 while((de = dictNext(di)) != NULL) {
5135 char *tmpsafe;
5136 c = (struct redisCommand *) dictGetVal(de);
5137 if (c->latency_histogram) {
5138 info = fillPercentileDistributionLatencies(info,
5139 getSafeInfoString(c->fullname, sdslen(c->fullname), &tmpsafe),
5140 c->latency_histogram);
5141 if (tmpsafe != NULL) zfree(tmpsafe);
5142 }
5143 if (c->subcommands_dict) {
5144 info = genRedisInfoStringLatencyStats(info, c->subcommands_dict);
5145 }
5146 }
5147 dictReleaseIterator(di);
5148
5149 return info;
5150}
5151
5152/* Takes a null terminated sections list, and adds them to the dict. */
5153void addInfoSectionsToDict(dict *section_dict, char **sections) {
5154 while (*sections) {
5155 sds section = sdsnew(*sections);
5156 if (dictAdd(section_dict, section, NULL)==DICT_ERR)
5157 sdsfree(section);
5158 sections++;
5159 }
5160}
5161
5162/* Cached copy of the default sections, as an optimization. */
5163static dict *cached_default_info_sections = NULL;
5164
5165void releaseInfoSectionDict(dict *sec) {
5166 if (sec != cached_default_info_sections)
5167 dictRelease(sec);
5168}
5169
5170/* Create a dictionary with unique section names to be used by genRedisInfoString.
5171 * 'argv' and 'argc' are list of arguments for INFO.
5172 * 'defaults' is an optional null terminated list of default sections.
5173 * 'out_all' and 'out_everything' are optional.
5174 * The resulting dictionary should be released with releaseInfoSectionDict. */
5175dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything) {
5176 char *default_sections[] = {
5177 "server", "clients", "memory", "persistence", "stats", "replication",
5178 "cpu", "module_list", "errorstats", "cluster", "keyspace", NULL};
5179 if (!defaults)
5180 defaults = default_sections;
5181
5182 if (argc == 0) {
5183 /* In this case we know the dict is not gonna be modified, so we cache
5184 * it as an optimization for a common case. */
5185 if (cached_default_info_sections)
5186 return cached_default_info_sections;
5187 cached_default_info_sections = dictCreate(&stringSetDictType);
5188 dictExpand(cached_default_info_sections, 16);
5189 addInfoSectionsToDict(cached_default_info_sections, defaults);
5190 return cached_default_info_sections;
5191 }
5192
5193 dict *section_dict = dictCreate(&stringSetDictType);
5194 dictExpand(section_dict, min(argc,16));
5195 for (int i = 0; i < argc; i++) {
5196 if (!strcasecmp(argv[i]->ptr,"default")) {
5197 addInfoSectionsToDict(section_dict, defaults);
5198 } else if (!strcasecmp(argv[i]->ptr,"all")) {
5199 if (out_all) *out_all = 1;
5200 } else if (!strcasecmp(argv[i]->ptr,"everything")) {
5201 if (out_everything) *out_everything = 1;
5202 if (out_all) *out_all = 1;
5203 } else {
5204 sds section = sdsnew(argv[i]->ptr);
5205 if (dictAdd(section_dict, section, NULL) != DICT_OK)
5206 sdsfree(section);
5207 }
5208 }
5209 return section_dict;
5210}
5211
5212/* Create the string returned by the INFO command. This is decoupled
5213 * by the INFO command itself as we need to report the same information
5214 * on memory corruption problems. */
5215sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
5216 sds info = sdsempty();
5217 time_t uptime = server.unixtime-server.stat_starttime;
5218 int j;
5219 int sections = 0;
5220 if (everything) all_sections = 1;
5221
5222 /* Server */
5223 if (all_sections || (dictFind(section_dict,"server") != NULL)) {
5224 static int call_uname = 1;
5225 static struct utsname name;
5226 char *mode;
5227 char *supervised;
5228
5229 if (server.cluster_enabled) mode = "cluster";
5230 else if (server.sentinel_mode) mode = "sentinel";
5231 else mode = "standalone";
5232
5233 if (server.supervised) {
5234 if (server.supervised_mode == SUPERVISED_UPSTART) supervised = "upstart";
5235 else if (server.supervised_mode == SUPERVISED_SYSTEMD) supervised = "systemd";
5236 else supervised = "unknown";
5237 } else {
5238 supervised = "no";
5239 }
5240
5241 if (sections++) info = sdscat(info,"\r\n");
5242
5243 if (call_uname) {
5244 /* Uname can be slow and is always the same output. Cache it. */
5245 uname(&name);
5246 call_uname = 0;
5247 }
5248
5249 unsigned int lruclock;
5250 atomicGet(server.lruclock,lruclock);
5251 info = sdscatfmt(info,
5252 "# Server\r\n"
5253 "redis_version:%s\r\n"
5254 "redis_git_sha1:%s\r\n"
5255 "redis_git_dirty:%i\r\n"
5256 "redis_build_id:%s\r\n"
5257 "redis_mode:%s\r\n"
5258 "os:%s %s %s\r\n"
5259 "arch_bits:%i\r\n"
5260 "monotonic_clock:%s\r\n"
5261 "multiplexing_api:%s\r\n"
5262 "atomicvar_api:%s\r\n"
5263 "gcc_version:%i.%i.%i\r\n"
5264 "process_id:%I\r\n"
5265 "process_supervised:%s\r\n"
5266 "run_id:%s\r\n"
5267 "tcp_port:%i\r\n"
5268 "server_time_usec:%I\r\n"
5269 "uptime_in_seconds:%I\r\n"
5270 "uptime_in_days:%I\r\n"
5271 "hz:%i\r\n"
5272 "configured_hz:%i\r\n"
5273 "lru_clock:%u\r\n"
5274 "executable:%s\r\n"
5275 "config_file:%s\r\n"
5276 "io_threads_active:%i\r\n",
5277 REDIS_VERSION,
5278 redisGitSHA1(),
5279 strtol(redisGitDirty(),NULL,10) > 0,
5280 redisBuildIdString(),
5281 mode,
5282 name.sysname, name.release, name.machine,
5283 server.arch_bits,
5284 monotonicInfoString(),
5285 aeGetApiName(),
5286 REDIS_ATOMIC_API,
5287#ifdef __GNUC__
5288 __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
5289#else
5290 0,0,0,
5291#endif
5292 (int64_t) getpid(),
5293 supervised,
5294 server.runid,
5295 server.port ? server.port : server.tls_port,
5296 (int64_t)server.ustime,
5297 (int64_t)uptime,
5298 (int64_t)(uptime/(3600*24)),
5299 server.hz,
5300 server.config_hz,
5301 lruclock,
5302 server.executable ? server.executable : "",
5303 server.configfile ? server.configfile : "",
5304 server.io_threads_active);
5305
5306 /* Conditional properties */
5307 if (isShutdownInitiated()) {
5308 info = sdscatfmt(info,
5309 "shutdown_in_milliseconds:%I\r\n",
5310 (int64_t)(server.shutdown_mstime - server.mstime));
5311 }
5312 }
5313
5314 /* Clients */
5315 if (all_sections || (dictFind(section_dict,"clients") != NULL)) {
5316 size_t maxin, maxout;
5317 getExpansiveClientsInfo(&maxin,&maxout);
5318 if (sections++) info = sdscat(info,"\r\n");
5319 info = sdscatprintf(info,
5320 "# Clients\r\n"
5321 "connected_clients:%lu\r\n"
5322 "cluster_connections:%lu\r\n"
5323 "maxclients:%u\r\n"
5324 "client_recent_max_input_buffer:%zu\r\n"
5325 "client_recent_max_output_buffer:%zu\r\n"
5326 "blocked_clients:%d\r\n"
5327 "tracking_clients:%d\r\n"
5328 "clients_in_timeout_table:%llu\r\n",
5329 listLength(server.clients)-listLength(server.slaves),
5330 getClusterConnectionsCount(),
5331 server.maxclients,
5332 maxin, maxout,
5333 server.blocked_clients,
5334 server.tracking_clients,
5335 (unsigned long long) raxSize(server.clients_timeout_table));
5336 }
5337
5338 /* Memory */
5339 if (all_sections || (dictFind(section_dict,"memory") != NULL)) {
5340 char hmem[64];
5341 char peak_hmem[64];
5342 char total_system_hmem[64];
5343 char used_memory_lua_hmem[64];
5344 char used_memory_vm_total_hmem[64];
5345 char used_memory_scripts_hmem[64];
5346 char used_memory_rss_hmem[64];
5347 char maxmemory_hmem[64];
5348 size_t zmalloc_used = zmalloc_used_memory();
5349 size_t total_system_mem = server.system_memory_size;
5350 const char *evict_policy = evictPolicyToString();
5351 long long memory_lua = evalMemory();
5352 long long memory_functions = functionsMemory();
5353 struct redisMemOverhead *mh = getMemoryOverheadData();
5354
5355 /* Peak memory is updated from time to time by serverCron() so it
5356 * may happen that the instantaneous value is slightly bigger than
5357 * the peak value. This may confuse users, so we update the peak
5358 * if found smaller than the current memory usage. */
5359 if (zmalloc_used > server.stat_peak_memory)
5360 server.stat_peak_memory = zmalloc_used;
5361
5362 bytesToHuman(hmem,zmalloc_used);
5363 bytesToHuman(peak_hmem,server.stat_peak_memory);
5364 bytesToHuman(total_system_hmem,total_system_mem);
5365 bytesToHuman(used_memory_lua_hmem,memory_lua);
5366 bytesToHuman(used_memory_vm_total_hmem,memory_functions + memory_lua);
5367 bytesToHuman(used_memory_scripts_hmem,mh->lua_caches + mh->functions_caches);
5368 bytesToHuman(used_memory_rss_hmem,server.cron_malloc_stats.process_rss);
5369 bytesToHuman(maxmemory_hmem,server.maxmemory);
5370
5371 if (sections++) info = sdscat(info,"\r\n");
5372 info = sdscatprintf(info,
5373 "# Memory\r\n"
5374 "used_memory:%zu\r\n"
5375 "used_memory_human:%s\r\n"
5376 "used_memory_rss:%zu\r\n"
5377 "used_memory_rss_human:%s\r\n"
5378 "used_memory_peak:%zu\r\n"
5379 "used_memory_peak_human:%s\r\n"
5380 "used_memory_peak_perc:%.2f%%\r\n"
5381 "used_memory_overhead:%zu\r\n"
5382 "used_memory_startup:%zu\r\n"
5383 "used_memory_dataset:%zu\r\n"
5384 "used_memory_dataset_perc:%.2f%%\r\n"
5385 "allocator_allocated:%zu\r\n"
5386 "allocator_active:%zu\r\n"
5387 "allocator_resident:%zu\r\n"
5388 "total_system_memory:%lu\r\n"
5389 "total_system_memory_human:%s\r\n"
5390 "used_memory_lua:%lld\r\n" /* deprecated, renamed to used_memory_vm_eval */
5391 "used_memory_vm_eval:%lld\r\n"
5392 "used_memory_lua_human:%s\r\n" /* deprecated */
5393 "used_memory_scripts_eval:%lld\r\n"
5394 "number_of_cached_scripts:%lu\r\n"
5395 "number_of_functions:%lu\r\n"
5396 "number_of_libraries:%lu\r\n"
5397 "used_memory_vm_functions:%lld\r\n"
5398 "used_memory_vm_total:%lld\r\n"
5399 "used_memory_vm_total_human:%s\r\n"
5400 "used_memory_functions:%lld\r\n"
5401 "used_memory_scripts:%lld\r\n"
5402 "used_memory_scripts_human:%s\r\n"
5403 "maxmemory:%lld\r\n"
5404 "maxmemory_human:%s\r\n"
5405 "maxmemory_policy:%s\r\n"
5406 "allocator_frag_ratio:%.2f\r\n"
5407 "allocator_frag_bytes:%zu\r\n"
5408 "allocator_rss_ratio:%.2f\r\n"
5409 "allocator_rss_bytes:%zd\r\n"
5410 "rss_overhead_ratio:%.2f\r\n"
5411 "rss_overhead_bytes:%zd\r\n"
5412 "mem_fragmentation_ratio:%.2f\r\n"
5413 "mem_fragmentation_bytes:%zd\r\n"
5414 "mem_not_counted_for_evict:%zu\r\n"
5415 "mem_replication_backlog:%zu\r\n"
5416 "mem_total_replication_buffers:%zu\r\n"
5417 "mem_clients_slaves:%zu\r\n"
5418 "mem_clients_normal:%zu\r\n"
5419 "mem_cluster_links:%zu\r\n"
5420 "mem_aof_buffer:%zu\r\n"
5421 "mem_allocator:%s\r\n"
5422 "active_defrag_running:%d\r\n"
5423 "lazyfree_pending_objects:%zu\r\n"
5424 "lazyfreed_objects:%zu\r\n",
5425 zmalloc_used,
5426 hmem,
5427 server.cron_malloc_stats.process_rss,
5428 used_memory_rss_hmem,
5429 server.stat_peak_memory,
5430 peak_hmem,
5431 mh->peak_perc,
5432 mh->overhead_total,
5433 mh->startup_allocated,
5434 mh->dataset,
5435 mh->dataset_perc,
5436 server.cron_malloc_stats.allocator_allocated,
5437 server.cron_malloc_stats.allocator_active,
5438 server.cron_malloc_stats.allocator_resident,
5439 (unsigned long)total_system_mem,
5440 total_system_hmem,
5441 memory_lua,
5442 memory_lua,
5443 used_memory_lua_hmem,
5444 (long long) mh->lua_caches,
5445 dictSize(evalScriptsDict()),
5446 functionsNum(),
5447 functionsLibNum(),
5448 memory_functions,
5449 memory_functions + memory_lua,
5450 used_memory_vm_total_hmem,
5451 (long long) mh->functions_caches,
5452 (long long) mh->lua_caches + (long long) mh->functions_caches,
5453 used_memory_scripts_hmem,
5454 server.maxmemory,
5455 maxmemory_hmem,
5456 evict_policy,
5457 mh->allocator_frag,
5458 mh->allocator_frag_bytes,
5459 mh->allocator_rss,
5460 mh->allocator_rss_bytes,
5461 mh->rss_extra,
5462 mh->rss_extra_bytes,
5463 mh->total_frag, /* This is the total RSS overhead, including
5464 fragmentation, but not just it. This field
5465 (and the next one) is named like that just
5466 for backward compatibility. */
5467 mh->total_frag_bytes,
5468 freeMemoryGetNotCountedMemory(),
5469 mh->repl_backlog,
5470 server.repl_buffer_mem,
5471 mh->clients_slaves,
5472 mh->clients_normal,
5473 mh->cluster_links,
5474 mh->aof_buffer,
5475 ZMALLOC_LIB,
5476 server.active_defrag_running,
5477 lazyfreeGetPendingObjectsCount(),
5478 lazyfreeGetFreedObjectsCount()
5479 );
5480 freeMemoryOverheadData(mh);
5481 }
5482
5483 /* Persistence */
5484 if (all_sections || (dictFind(section_dict,"persistence") != NULL)) {
5485 if (sections++) info = sdscat(info,"\r\n");
5486 double fork_perc = 0;
5487 if (server.stat_module_progress) {
5488 fork_perc = server.stat_module_progress * 100;
5489 } else if (server.stat_current_save_keys_total) {
5490 fork_perc = ((double)server.stat_current_save_keys_processed / server.stat_current_save_keys_total) * 100;
5491 }
5492 int aof_bio_fsync_status;
5493 atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
5494
5495 info = sdscatprintf(info,
5496 "# Persistence\r\n"
5497 "loading:%d\r\n"
5498 "async_loading:%d\r\n"
5499 "current_cow_peak:%zu\r\n"
5500 "current_cow_size:%zu\r\n"
5501 "current_cow_size_age:%lu\r\n"
5502 "current_fork_perc:%.2f\r\n"
5503 "current_save_keys_processed:%zu\r\n"
5504 "current_save_keys_total:%zu\r\n"
5505 "rdb_changes_since_last_save:%lld\r\n"
5506 "rdb_bgsave_in_progress:%d\r\n"
5507 "rdb_last_save_time:%jd\r\n"
5508 "rdb_last_bgsave_status:%s\r\n"
5509 "rdb_last_bgsave_time_sec:%jd\r\n"
5510 "rdb_current_bgsave_time_sec:%jd\r\n"
5511 "rdb_saves:%lld\r\n"
5512 "rdb_last_cow_size:%zu\r\n"
5513 "rdb_last_load_keys_expired:%lld\r\n"
5514 "rdb_last_load_keys_loaded:%lld\r\n"
5515 "aof_enabled:%d\r\n"
5516 "aof_rewrite_in_progress:%d\r\n"
5517 "aof_rewrite_scheduled:%d\r\n"
5518 "aof_last_rewrite_time_sec:%jd\r\n"
5519 "aof_current_rewrite_time_sec:%jd\r\n"
5520 "aof_last_bgrewrite_status:%s\r\n"
5521 "aof_rewrites:%lld\r\n"
5522 "aof_rewrites_consecutive_failures:%lld\r\n"
5523 "aof_last_write_status:%s\r\n"
5524 "aof_last_cow_size:%zu\r\n"
5525 "module_fork_in_progress:%d\r\n"
5526 "module_fork_last_cow_size:%zu\r\n",
5527 (int)(server.loading && !server.async_loading),
5528 (int)server.async_loading,
5529 server.stat_current_cow_peak,
5530 server.stat_current_cow_bytes,
5531 server.stat_current_cow_updated ? (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0,
5532 fork_perc,
5533 server.stat_current_save_keys_processed,
5534 server.stat_current_save_keys_total,
5535 server.dirty,
5536 server.child_type == CHILD_TYPE_RDB,
5537 (intmax_t)server.lastsave,
5538 (server.lastbgsave_status == C_OK) ? "ok" : "err",
5539 (intmax_t)server.rdb_save_time_last,
5540 (intmax_t)((server.child_type != CHILD_TYPE_RDB) ?
5541 -1 : time(NULL)-server.rdb_save_time_start),
5542 server.stat_rdb_saves,
5543 server.stat_rdb_cow_bytes,
5544 server.rdb_last_load_keys_expired,
5545 server.rdb_last_load_keys_loaded,
5546 server.aof_state != AOF_OFF,
5547 server.child_type == CHILD_TYPE_AOF,
5548 server.aof_rewrite_scheduled,
5549 (intmax_t)server.aof_rewrite_time_last,
5550 (intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
5551 -1 : time(NULL)-server.aof_rewrite_time_start),
5552 (server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
5553 server.stat_aof_rewrites,
5554 server.stat_aofrw_consecutive_failures,
5555 (server.aof_last_write_status == C_OK &&
5556 aof_bio_fsync_status == C_OK) ? "ok" : "err",
5557 server.stat_aof_cow_bytes,
5558 server.child_type == CHILD_TYPE_MODULE,
5559 server.stat_module_cow_bytes);
5560
5561 if (server.aof_enabled) {
5562 info = sdscatprintf(info,
5563 "aof_current_size:%lld\r\n"
5564 "aof_base_size:%lld\r\n"
5565 "aof_pending_rewrite:%d\r\n"
5566 "aof_buffer_length:%zu\r\n"
5567 "aof_pending_bio_fsync:%llu\r\n"
5568 "aof_delayed_fsync:%lu\r\n",
5569 (long long) server.aof_current_size,
5570 (long long) server.aof_rewrite_base_size,
5571 server.aof_rewrite_scheduled,
5572 sdslen(server.aof_buf),
5573 bioPendingJobsOfType(BIO_AOF_FSYNC),
5574 server.aof_delayed_fsync);
5575 }
5576
5577 if (server.loading) {
5578 double perc = 0;
5579 time_t eta, elapsed;
5580 off_t remaining_bytes = 1;
5581
5582 if (server.loading_total_bytes) {
5583 perc = ((double)server.loading_loaded_bytes / server.loading_total_bytes) * 100;
5584 remaining_bytes = server.loading_total_bytes - server.loading_loaded_bytes;
5585 } else if(server.loading_rdb_used_mem) {
5586 perc = ((double)server.loading_loaded_bytes / server.loading_rdb_used_mem) * 100;
5587 remaining_bytes = server.loading_rdb_used_mem - server.loading_loaded_bytes;
5588 /* used mem is only a (bad) estimation of the rdb file size, avoid going over 100% */
5589 if (perc > 99.99) perc = 99.99;
5590 if (remaining_bytes < 1) remaining_bytes = 1;
5591 }
5592
5593 elapsed = time(NULL)-server.loading_start_time;
5594 if (elapsed == 0) {
5595 eta = 1; /* A fake 1 second figure if we don't have
5596 enough info */
5597 } else {
5598 eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
5599 }
5600
5601 info = sdscatprintf(info,
5602 "loading_start_time:%jd\r\n"
5603 "loading_total_bytes:%llu\r\n"
5604 "loading_rdb_used_mem:%llu\r\n"
5605 "loading_loaded_bytes:%llu\r\n"
5606 "loading_loaded_perc:%.2f\r\n"
5607 "loading_eta_seconds:%jd\r\n",
5608 (intmax_t) server.loading_start_time,
5609 (unsigned long long) server.loading_total_bytes,
5610 (unsigned long long) server.loading_rdb_used_mem,
5611 (unsigned long long) server.loading_loaded_bytes,
5612 perc,
5613 (intmax_t)eta
5614 );
5615 }
5616 }
5617
5618 /* Stats */
5619 if (all_sections || (dictFind(section_dict,"stats") != NULL)) {
5620 long long stat_total_reads_processed, stat_total_writes_processed;
5621 long long stat_net_input_bytes, stat_net_output_bytes;
5622 long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
5623 long long current_eviction_exceeded_time = server.stat_last_eviction_exceeded_time ?
5624 (long long) elapsedUs(server.stat_last_eviction_exceeded_time): 0;
5625 long long current_active_defrag_time = server.stat_last_active_defrag_time ?
5626 (long long) elapsedUs(server.stat_last_active_defrag_time): 0;
5627 atomicGet(server.stat_total_reads_processed, stat_total_reads_processed);
5628 atomicGet(server.stat_total_writes_processed, stat_total_writes_processed);
5629 atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
5630 atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
5631 atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
5632 atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
5633
5634 if (sections++) info = sdscat(info,"\r\n");
5635 info = sdscatprintf(info,
5636 "# Stats\r\n"
5637 "total_connections_received:%lld\r\n"
5638 "total_commands_processed:%lld\r\n"
5639 "instantaneous_ops_per_sec:%lld\r\n"
5640 "total_net_input_bytes:%lld\r\n"
5641 "total_net_output_bytes:%lld\r\n"
5642 "total_net_repl_input_bytes:%lld\r\n"
5643 "total_net_repl_output_bytes:%lld\r\n"
5644 "instantaneous_input_kbps:%.2f\r\n"
5645 "instantaneous_output_kbps:%.2f\r\n"
5646 "instantaneous_input_repl_kbps:%.2f\r\n"
5647 "instantaneous_output_repl_kbps:%.2f\r\n"
5648 "rejected_connections:%lld\r\n"
5649 "sync_full:%lld\r\n"
5650 "sync_partial_ok:%lld\r\n"
5651 "sync_partial_err:%lld\r\n"
5652 "expired_keys:%lld\r\n"
5653 "expired_stale_perc:%.2f\r\n"
5654 "expired_time_cap_reached_count:%lld\r\n"
5655 "expire_cycle_cpu_milliseconds:%lld\r\n"
5656 "evicted_keys:%lld\r\n"
5657 "evicted_clients:%lld\r\n"
5658 "total_eviction_exceeded_time:%lld\r\n"
5659 "current_eviction_exceeded_time:%lld\r\n"
5660 "keyspace_hits:%lld\r\n"
5661 "keyspace_misses:%lld\r\n"
5662 "pubsub_channels:%ld\r\n"
5663 "pubsub_patterns:%lu\r\n"
5664 "pubsubshard_channels:%lu\r\n"
5665 "latest_fork_usec:%lld\r\n"
5666 "total_forks:%lld\r\n"
5667 "migrate_cached_sockets:%ld\r\n"
5668 "slave_expires_tracked_keys:%zu\r\n"
5669 "active_defrag_hits:%lld\r\n"
5670 "active_defrag_misses:%lld\r\n"
5671 "active_defrag_key_hits:%lld\r\n"
5672 "active_defrag_key_misses:%lld\r\n"
5673 "total_active_defrag_time:%lld\r\n"
5674 "current_active_defrag_time:%lld\r\n"
5675 "tracking_total_keys:%lld\r\n"
5676 "tracking_total_items:%lld\r\n"
5677 "tracking_total_prefixes:%lld\r\n"
5678 "unexpected_error_replies:%lld\r\n"
5679 "total_error_replies:%lld\r\n"
5680 "dump_payload_sanitizations:%lld\r\n"
5681 "total_reads_processed:%lld\r\n"
5682 "total_writes_processed:%lld\r\n"
5683 "io_threaded_reads_processed:%lld\r\n"
5684 "io_threaded_writes_processed:%lld\r\n"
5685 "reply_buffer_shrinks:%lld\r\n"
5686 "reply_buffer_expands:%lld\r\n",
5687 server.stat_numconnections,
5688 server.stat_numcommands,
5689 getInstantaneousMetric(STATS_METRIC_COMMAND),
5690 stat_net_input_bytes + stat_net_repl_input_bytes,
5691 stat_net_output_bytes + stat_net_repl_output_bytes,
5692 stat_net_repl_input_bytes,
5693 stat_net_repl_output_bytes,
5694 (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
5695 (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
5696 (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION)/1024,
5697 (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION)/1024,
5698 server.stat_rejected_conn,
5699 server.stat_sync_full,
5700 server.stat_sync_partial_ok,
5701 server.stat_sync_partial_err,
5702 server.stat_expiredkeys,
5703 server.stat_expired_stale_perc*100,
5704 server.stat_expired_time_cap_reached_count,
5705 server.stat_expire_cycle_time_used/1000,
5706 server.stat_evictedkeys,
5707 server.stat_evictedclients,
5708 (server.stat_total_eviction_exceeded_time + current_eviction_exceeded_time) / 1000,
5709 current_eviction_exceeded_time / 1000,
5710 server.stat_keyspace_hits,
5711 server.stat_keyspace_misses,
5712 dictSize(server.pubsub_channels),
5713 dictSize(server.pubsub_patterns),
5714 dictSize(server.pubsubshard_channels),
5715 server.stat_fork_time,
5716 server.stat_total_forks,
5717 dictSize(server.migrate_cached_sockets),
5718 getSlaveKeyWithExpireCount(),
5719 server.stat_active_defrag_hits,
5720 server.stat_active_defrag_misses,
5721 server.stat_active_defrag_key_hits,
5722 server.stat_active_defrag_key_misses,
5723 (server.stat_total_active_defrag_time + current_active_defrag_time) / 1000,
5724 current_active_defrag_time / 1000,
5725 (unsigned long long) trackingGetTotalKeys(),
5726 (unsigned long long) trackingGetTotalItems(),
5727 (unsigned long long) trackingGetTotalPrefixes(),
5728 server.stat_unexpected_error_replies,
5729 server.stat_total_error_replies,
5730 server.stat_dump_payload_sanitizations,
5731 stat_total_reads_processed,
5732 stat_total_writes_processed,
5733 server.stat_io_reads_processed,
5734 server.stat_io_writes_processed,
5735 server.stat_reply_buffer_shrinks,
5736 server.stat_reply_buffer_expands);
5737 }
5738
5739 /* Replication */
5740 if (all_sections || (dictFind(section_dict,"replication") != NULL)) {
5741 if (sections++) info = sdscat(info,"\r\n");
5742 info = sdscatprintf(info,
5743 "# Replication\r\n"
5744 "role:%s\r\n",
5745 server.masterhost == NULL ? "master" : "slave");
5746 if (server.masterhost) {
5747 long long slave_repl_offset = 1;
5748 long long slave_read_repl_offset = 1;
5749
5750 if (server.master) {
5751 slave_repl_offset = server.master->reploff;
5752 slave_read_repl_offset = server.master->read_reploff;
5753 } else if (server.cached_master) {
5754 slave_repl_offset = server.cached_master->reploff;
5755 slave_read_repl_offset = server.cached_master->read_reploff;
5756 }
5757
5758 info = sdscatprintf(info,
5759 "master_host:%s\r\n"
5760 "master_port:%d\r\n"
5761 "master_link_status:%s\r\n"
5762 "master_last_io_seconds_ago:%d\r\n"
5763 "master_sync_in_progress:%d\r\n"
5764 "slave_read_repl_offset:%lld\r\n"
5765 "slave_repl_offset:%lld\r\n"
5766 ,server.masterhost,
5767 server.masterport,
5768 (server.repl_state == REPL_STATE_CONNECTED) ?
5769 "up" : "down",
5770 server.master ?
5771 ((int)(server.unixtime-server.master->lastinteraction)) : -1,
5772 server.repl_state == REPL_STATE_TRANSFER,
5773 slave_read_repl_offset,
5774 slave_repl_offset
5775 );
5776
5777 if (server.repl_state == REPL_STATE_TRANSFER) {
5778 double perc = 0;
5779 if (server.repl_transfer_size) {
5780 perc = ((double)server.repl_transfer_read / server.repl_transfer_size) * 100;
5781 }
5782 info = sdscatprintf(info,
5783 "master_sync_total_bytes:%lld\r\n"
5784 "master_sync_read_bytes:%lld\r\n"
5785 "master_sync_left_bytes:%lld\r\n"
5786 "master_sync_perc:%.2f\r\n"
5787 "master_sync_last_io_seconds_ago:%d\r\n",
5788 (long long) server.repl_transfer_size,
5789 (long long) server.repl_transfer_read,
5790 (long long) (server.repl_transfer_size - server.repl_transfer_read),
5791 perc,
5792 (int)(server.unixtime-server.repl_transfer_lastio)
5793 );
5794 }
5795
5796 if (server.repl_state != REPL_STATE_CONNECTED) {
5797 info = sdscatprintf(info,
5798 "master_link_down_since_seconds:%jd\r\n",
5799 server.repl_down_since ?
5800 (intmax_t)(server.unixtime-server.repl_down_since) : -1);
5801 }
5802 info = sdscatprintf(info,
5803 "slave_priority:%d\r\n"
5804 "slave_read_only:%d\r\n"
5805 "replica_announced:%d\r\n",
5806 server.slave_priority,
5807 server.repl_slave_ro,
5808 server.replica_announced);
5809 }
5810
5811 info = sdscatprintf(info,
5812 "connected_slaves:%lu\r\n",
5813 listLength(server.slaves));
5814
5815 /* If min-slaves-to-write is active, write the number of slaves
5816 * currently considered 'good'. */
5817 if (server.repl_min_slaves_to_write &&
5818 server.repl_min_slaves_max_lag) {
5819 info = sdscatprintf(info,
5820 "min_slaves_good_slaves:%d\r\n",
5821 server.repl_good_slaves_count);
5822 }
5823
5824 if (listLength(server.slaves)) {
5825 int slaveid = 0;
5826 listNode *ln;
5827 listIter li;
5828
5829 listRewind(server.slaves,&li);
5830 while((ln = listNext(&li))) {
5831 client *slave = listNodeValue(ln);
5832 char ip[NET_IP_STR_LEN], *slaveip = slave->slave_addr;
5833 int port;
5834 long lag = 0;
5835
5836 if (!slaveip) {
5837 if (connPeerToString(slave->conn,ip,sizeof(ip),&port) == -1)
5838 continue;
5839 slaveip = ip;
5840 }
5841 const char *state = replstateToString(slave->replstate);
5842 if (state[0] == '\0') continue;
5843 if (slave->replstate == SLAVE_STATE_ONLINE)
5844 lag = time(NULL) - slave->repl_ack_time;
5845
5846 info = sdscatprintf(info,
5847 "slave%d:ip=%s,port=%d,state=%s,"
5848 "offset=%lld,lag=%ld\r\n",
5849 slaveid,slaveip,slave->slave_listening_port,state,
5850 slave->repl_ack_off, lag);
5851 slaveid++;
5852 }
5853 }
5854 info = sdscatprintf(info,
5855 "master_failover_state:%s\r\n"
5856 "master_replid:%s\r\n"
5857 "master_replid2:%s\r\n"
5858 "master_repl_offset:%lld\r\n"
5859 "second_repl_offset:%lld\r\n"
5860 "repl_backlog_active:%d\r\n"
5861 "repl_backlog_size:%lld\r\n"
5862 "repl_backlog_first_byte_offset:%lld\r\n"
5863 "repl_backlog_histlen:%lld\r\n",
5864 getFailoverStateString(),
5865 server.replid,
5866 server.replid2,
5867 server.master_repl_offset,
5868 server.second_replid_offset,
5869 server.repl_backlog != NULL,
5870 server.repl_backlog_size,
5871 server.repl_backlog ? server.repl_backlog->offset : 0,
5872 server.repl_backlog ? server.repl_backlog->histlen : 0);
5873 }
5874
5875 /* CPU */
5876 if (all_sections || (dictFind(section_dict,"cpu") != NULL)) {
5877 if (sections++) info = sdscat(info,"\r\n");
5878
5879 struct rusage self_ru, c_ru;
5880 getrusage(RUSAGE_SELF, &self_ru);
5881 getrusage(RUSAGE_CHILDREN, &c_ru);
5882 info = sdscatprintf(info,
5883 "# CPU\r\n"
5884 "used_cpu_sys:%ld.%06ld\r\n"
5885 "used_cpu_user:%ld.%06ld\r\n"
5886 "used_cpu_sys_children:%ld.%06ld\r\n"
5887 "used_cpu_user_children:%ld.%06ld\r\n",
5888 (long)self_ru.ru_stime.tv_sec, (long)self_ru.ru_stime.tv_usec,
5889 (long)self_ru.ru_utime.tv_sec, (long)self_ru.ru_utime.tv_usec,
5890 (long)c_ru.ru_stime.tv_sec, (long)c_ru.ru_stime.tv_usec,
5891 (long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec);
5892#ifdef RUSAGE_THREAD
5893 struct rusage m_ru;
5894 getrusage(RUSAGE_THREAD, &m_ru);
5895 info = sdscatprintf(info,
5896 "used_cpu_sys_main_thread:%ld.%06ld\r\n"
5897 "used_cpu_user_main_thread:%ld.%06ld\r\n",
5898 (long)m_ru.ru_stime.tv_sec, (long)m_ru.ru_stime.tv_usec,
5899 (long)m_ru.ru_utime.tv_sec, (long)m_ru.ru_utime.tv_usec);
5900#endif /* RUSAGE_THREAD */
5901 }
5902
5903 /* Modules */
5904 if (all_sections || (dictFind(section_dict,"module_list") != NULL) || (dictFind(section_dict,"modules") != NULL)) {
5905 if (sections++) info = sdscat(info,"\r\n");
5906 info = sdscatprintf(info,"# Modules\r\n");
5907 info = genModulesInfoString(info);
5908 }
5909
5910 /* Command statistics */
5911 if (all_sections || (dictFind(section_dict,"commandstats") != NULL)) {
5912 if (sections++) info = sdscat(info,"\r\n");
5913 info = sdscatprintf(info, "# Commandstats\r\n");
5914 info = genRedisInfoStringCommandStats(info, server.commands);
5915 }
5916
5917 /* Error statistics */
5918 if (all_sections || (dictFind(section_dict,"errorstats") != NULL)) {
5919 if (sections++) info = sdscat(info,"\r\n");
5920 info = sdscat(info, "# Errorstats\r\n");
5921 raxIterator ri;
5922 raxStart(&ri,server.errors);
5923 raxSeek(&ri,"^",NULL,0);
5924 struct redisError *e;
5925 while(raxNext(&ri)) {
5926 char *tmpsafe;
5927 e = (struct redisError *) ri.data;
5928 info = sdscatprintf(info,
5929 "errorstat_%.*s:count=%lld\r\n",
5930 (int)ri.key_len, getSafeInfoString((char *) ri.key, ri.key_len, &tmpsafe), e->count);
5931 if (tmpsafe != NULL) zfree(tmpsafe);
5932 }
5933 raxStop(&ri);
5934 }
5935
5936 /* Latency by percentile distribution per command */
5937 if (all_sections || (dictFind(section_dict,"latencystats") != NULL)) {
5938 if (sections++) info = sdscat(info,"\r\n");
5939 info = sdscatprintf(info, "# Latencystats\r\n");
5940 if (server.latency_tracking_enabled) {
5941 info = genRedisInfoStringLatencyStats(info, server.commands);
5942 }
5943 }
5944
5945 /* Cluster */
5946 if (all_sections || (dictFind(section_dict,"cluster") != NULL)) {
5947 if (sections++) info = sdscat(info,"\r\n");
5948 info = sdscatprintf(info,
5949 "# Cluster\r\n"
5950 "cluster_enabled:%d\r\n",
5951 server.cluster_enabled);
5952 }
5953
5954 /* Key space */
5955 if (all_sections || (dictFind(section_dict,"keyspace") != NULL)) {
5956 if (sections++) info = sdscat(info,"\r\n");
5957 info = sdscatprintf(info, "# Keyspace\r\n");
5958 for (j = 0; j < server.dbnum; j++) {
5959 long long keys, vkeys;
5960
5961 keys = dictSize(server.db[j].dict);
5962 vkeys = dictSize(server.db[j].expires);
5963 if (keys || vkeys) {
5964 info = sdscatprintf(info,
5965 "db%d:keys=%lld,expires=%lld,avg_ttl=%lld\r\n",
5966 j, keys, vkeys, server.db[j].avg_ttl);
5967 }
5968 }
5969 }
5970
5971 /* Get info from modules.
5972 * if user asked for "everything" or "modules", or a specific section
5973 * that's not found yet. */
5974 if (everything || dictFind(section_dict, "modules") != NULL || sections < (int)dictSize(section_dict)) {
5975
5976 info = modulesCollectInfo(info,
5977 everything || dictFind(section_dict, "modules") != NULL ? NULL: section_dict,
5978 0, /* not a crash report */
5979 sections);
5980 }
5981 return info;
5982}
5983
5984/* INFO [<section> [<section> ...]] */
5985void infoCommand(client *c) {
5986 if (server.sentinel_mode) {
5987 sentinelInfoCommand(c);
5988 return;
5989 }
5990 int all_sections = 0;
5991 int everything = 0;
5992 dict *sections_dict = genInfoSectionDict(c->argv+1, c->argc-1, NULL, &all_sections, &everything);
5993 sds info = genRedisInfoString(sections_dict, all_sections, everything);
5994 addReplyVerbatim(c,info,sdslen(info),"txt");
5995 sdsfree(info);
5996 releaseInfoSectionDict(sections_dict);
5997 return;
5998}
5999
6000void monitorCommand(client *c) {
6001 if (c->flags & CLIENT_DENY_BLOCKING) {
6002 /**
6003 * A client that has CLIENT_DENY_BLOCKING flag on
6004 * expects a reply per command and so can't execute MONITOR. */
6005 addReplyError(c, "MONITOR isn't allowed for DENY BLOCKING client");
6006 return;
6007 }
6008
6009 /* ignore MONITOR if already slave or in monitor mode */
6010 if (c->flags & CLIENT_SLAVE) return;
6011
6012 c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR);
6013 listAddNodeTail(server.monitors,c);
6014 addReply(c,shared.ok);
6015}
6016
6017/* =================================== Main! ================================ */
6018
6019int checkIgnoreWarning(const char *warning) {
6020 int argc, j;
6021 sds *argv = sdssplitargs(server.ignore_warnings, &argc);
6022 if (argv == NULL)
6023 return 0;
6024
6025 for (j = 0; j < argc; j++) {
6026 char *flag = argv[j];
6027 if (!strcasecmp(flag, warning))
6028 break;
6029 }
6030 sdsfreesplitres(argv,argc);
6031 return j < argc;
6032}
6033
6034#ifdef __linux__
6035#include <sys/prctl.h>
6036/* since linux-3.5, kernel supports to set the state of the "THP disable" flag
6037 * for the calling thread. PR_SET_THP_DISABLE is defined in linux/prctl.h */
6038static int THPDisable(void) {
6039 int ret = -EINVAL;
6040
6041 if (!server.disable_thp)
6042 return ret;
6043
6044#ifdef PR_SET_THP_DISABLE
6045 ret = prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0);
6046#endif
6047
6048 return ret;
6049}
6050
6051void linuxMemoryWarnings(void) {
6052 sds err_msg = NULL;
6053 if (checkOvercommit(&err_msg) < 0) {
6054 serverLog(LL_WARNING,"WARNING %s", err_msg);
6055 sdsfree(err_msg);
6056 }
6057 if (checkTHPEnabled(&err_msg) < 0) {
6058 server.thp_enabled = 1;
6059 if (THPDisable() == 0) {
6060 server.thp_enabled = 0;
6061 } else {
6062 serverLog(LL_WARNING, "WARNING %s", err_msg);
6063 }
6064 sdsfree(err_msg);
6065 }
6066}
6067#endif /* __linux__ */
6068
6069void createPidFile(void) {
6070 /* If pidfile requested, but no pidfile defined, use
6071 * default pidfile path */
6072 if (!server.pidfile) server.pidfile = zstrdup(CONFIG_DEFAULT_PID_FILE);
6073
6074 /* Try to write the pid file in a best-effort way. */
6075 FILE *fp = fopen(server.pidfile,"w");
6076 if (fp) {
6077 fprintf(fp,"%d\n",(int)getpid());
6078 fclose(fp);
6079 }
6080}
6081
6082void daemonize(void) {
6083 int fd;
6084
6085 if (fork() != 0) exit(0); /* parent exits */
6086 setsid(); /* create a new session */
6087
6088 /* Every output goes to /dev/null. If Redis is daemonized but
6089 * the 'logfile' is set to 'stdout' in the configuration file
6090 * it will not log at all. */
6091 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
6092 dup2(fd, STDIN_FILENO);
6093 dup2(fd, STDOUT_FILENO);
6094 dup2(fd, STDERR_FILENO);
6095 if (fd > STDERR_FILENO) close(fd);
6096 }
6097}
6098
6099void version(void) {
6100 printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d build=%llx\n",
6101 REDIS_VERSION,
6102 redisGitSHA1(),
6103 atoi(redisGitDirty()) > 0,
6104 ZMALLOC_LIB,
6105 sizeof(long) == 4 ? 32 : 64,
6106 (unsigned long long) redisBuildId());
6107 exit(0);
6108}
6109
6110void usage(void) {
6111 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options] [-]\n");
6112 fprintf(stderr," ./redis-server - (read config from stdin)\n");
6113 fprintf(stderr," ./redis-server -v or --version\n");
6114 fprintf(stderr," ./redis-server -h or --help\n");
6115 fprintf(stderr," ./redis-server --test-memory <megabytes>\n");
6116 fprintf(stderr," ./redis-server --check-system\n");
6117 fprintf(stderr,"\n");
6118 fprintf(stderr,"Examples:\n");
6119 fprintf(stderr," ./redis-server (run the server with default conf)\n");
6120 fprintf(stderr," echo 'maxmemory 128mb' | ./redis-server -\n");
6121 fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
6122 fprintf(stderr," ./redis-server --port 7777\n");
6123 fprintf(stderr," ./redis-server --port 7777 --replicaof 127.0.0.1 8888\n");
6124 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose -\n");
6125 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n\n");
6126 fprintf(stderr,"Sentinel mode:\n");
6127 fprintf(stderr," ./redis-server /etc/sentinel.conf --sentinel\n");
6128 exit(1);
6129}
6130
6131void redisAsciiArt(void) {
6132#include "asciilogo.h"
6133 char *buf = zmalloc(1024*16);
6134 char *mode;
6135
6136 if (server.cluster_enabled) mode = "cluster";
6137 else if (server.sentinel_mode) mode = "sentinel";
6138 else mode = "standalone";
6139
6140 /* Show the ASCII logo if: log file is stdout AND stdout is a
6141 * tty AND syslog logging is disabled. Also show logo if the user
6142 * forced us to do so via redis.conf. */
6143 int = ((!server.syslog_enabled &&
6144 server.logfile[0] == '\0' &&
6145 isatty(fileno(stdout))) ||
6146 server.always_show_logo);
6147
6148 if (!show_logo) {
6149 serverLog(LL_NOTICE,
6150 "Running mode=%s, port=%d.",
6151 mode, server.port ? server.port : server.tls_port
6152 );
6153 } else {
6154 snprintf(buf,1024*16,ascii_logo,
6155 REDIS_VERSION,
6156 redisGitSHA1(),
6157 strtol(redisGitDirty(),NULL,10) > 0,
6158 (sizeof(long) == 8) ? "64" : "32",
6159 mode, server.port ? server.port : server.tls_port,
6160 (long) getpid()
6161 );
6162 serverLogRaw(LL_NOTICE|LL_RAW,buf);
6163 }
6164 zfree(buf);
6165}
6166
6167int changeBindAddr(void) {
6168 /* Close old TCP and TLS servers */
6169 closeSocketListeners(&server.ipfd);
6170 closeSocketListeners(&server.tlsfd);
6171
6172 /* Bind to the new port */
6173 if ((server.port != 0 && listenToPort(server.port, &server.ipfd) != C_OK) ||
6174 (server.tls_port != 0 && listenToPort(server.tls_port, &server.tlsfd) != C_OK)) {
6175 serverLog(LL_WARNING, "Failed to bind");
6176
6177 closeSocketListeners(&server.ipfd);
6178 closeSocketListeners(&server.tlsfd);
6179 return C_ERR;
6180 }
6181
6182 /* Create TCP and TLS event handlers */
6183 if (createSocketAcceptHandler(&server.ipfd, acceptTcpHandler) != C_OK) {
6184 serverPanic("Unrecoverable error creating TCP socket accept handler.");
6185 }
6186 if (createSocketAcceptHandler(&server.tlsfd, acceptTLSHandler) != C_OK) {
6187 serverPanic("Unrecoverable error creating TLS socket accept handler.");
6188 }
6189
6190 if (server.set_proc_title) redisSetProcTitle(NULL);
6191
6192 return C_OK;
6193}
6194
6195int changeListenPort(int port, socketFds *sfd, aeFileProc *accept_handler) {
6196 socketFds new_sfd = {{0}};
6197
6198 /* Close old servers */
6199 closeSocketListeners(sfd);
6200
6201 /* Just close the server if port disabled */
6202 if (port == 0) {
6203 if (server.set_proc_title) redisSetProcTitle(NULL);
6204 return C_OK;
6205 }
6206
6207 /* Bind to the new port */
6208 if (listenToPort(port, &new_sfd) != C_OK) {
6209 return C_ERR;
6210 }
6211
6212 /* Create event handlers */
6213 if (createSocketAcceptHandler(&new_sfd, accept_handler) != C_OK) {
6214 closeSocketListeners(&new_sfd);
6215 return C_ERR;
6216 }
6217
6218 /* Copy new descriptors */
6219 sfd->count = new_sfd.count;
6220 memcpy(sfd->fd, new_sfd.fd, sizeof(new_sfd.fd));
6221
6222 if (server.set_proc_title) redisSetProcTitle(NULL);
6223
6224 return C_OK;
6225}
6226
6227static void sigShutdownHandler(int sig) {
6228 char *msg;
6229
6230 switch (sig) {
6231 case SIGINT:
6232 msg = "Received SIGINT scheduling shutdown...";
6233 break;
6234 case SIGTERM:
6235 msg = "Received SIGTERM scheduling shutdown...";
6236 break;
6237 default:
6238 msg = "Received shutdown signal, scheduling shutdown...";
6239 };
6240
6241 /* SIGINT is often delivered via Ctrl+C in an interactive session.
6242 * If we receive the signal the second time, we interpret this as
6243 * the user really wanting to quit ASAP without waiting to persist
6244 * on disk and without waiting for lagging replicas. */
6245 if (server.shutdown_asap && sig == SIGINT) {
6246 serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
6247 rdbRemoveTempFile(getpid(), 1);
6248 exit(1); /* Exit with an error since this was not a clean shutdown. */
6249 } else if (server.loading) {
6250 msg = "Received shutdown signal during loading, scheduling shutdown.";
6251 }
6252
6253 serverLogFromHandler(LL_WARNING, msg);
6254 server.shutdown_asap = 1;
6255 server.last_sig_received = sig;
6256}
6257
6258void setupSignalHandlers(void) {
6259 struct sigaction act;
6260
6261 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
6262 * Otherwise, sa_handler is used. */
6263 sigemptyset(&act.sa_mask);
6264 act.sa_flags = 0;
6265 act.sa_handler = sigShutdownHandler;
6266 sigaction(SIGTERM, &act, NULL);
6267 sigaction(SIGINT, &act, NULL);
6268
6269 sigemptyset(&act.sa_mask);
6270 act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
6271 act.sa_sigaction = sigsegvHandler;
6272 if(server.crashlog_enabled) {
6273 sigaction(SIGSEGV, &act, NULL);
6274 sigaction(SIGBUS, &act, NULL);
6275 sigaction(SIGFPE, &act, NULL);
6276 sigaction(SIGILL, &act, NULL);
6277 sigaction(SIGABRT, &act, NULL);
6278 }
6279 return;
6280}
6281
6282void removeSignalHandlers(void) {
6283 struct sigaction act;
6284 sigemptyset(&act.sa_mask);
6285 act.sa_flags = SA_NODEFER | SA_RESETHAND;
6286 act.sa_handler = SIG_DFL;
6287 sigaction(SIGSEGV, &act, NULL);
6288 sigaction(SIGBUS, &act, NULL);
6289 sigaction(SIGFPE, &act, NULL);
6290 sigaction(SIGILL, &act, NULL);
6291 sigaction(SIGABRT, &act, NULL);
6292}
6293
6294/* This is the signal handler for children process. It is currently useful
6295 * in order to track the SIGUSR1, that we send to a child in order to terminate
6296 * it in a clean way, without the parent detecting an error and stop
6297 * accepting writes because of a write error condition. */
6298static void sigKillChildHandler(int sig) {
6299 UNUSED(sig);
6300 int level = server.in_fork_child == CHILD_TYPE_MODULE? LL_VERBOSE: LL_WARNING;
6301 serverLogFromHandler(level, "Received SIGUSR1 in child, exiting now.");
6302 exitFromChild(SERVER_CHILD_NOERROR_RETVAL);
6303}
6304
6305void setupChildSignalHandlers(void) {
6306 struct sigaction act;
6307
6308 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
6309 * Otherwise, sa_handler is used. */
6310 sigemptyset(&act.sa_mask);
6311 act.sa_flags = 0;
6312 act.sa_handler = sigKillChildHandler;
6313 sigaction(SIGUSR1, &act, NULL);
6314}
6315
6316/* After fork, the child process will inherit the resources
6317 * of the parent process, e.g. fd(socket or flock) etc.
6318 * should close the resources not used by the child process, so that if the
6319 * parent restarts it can bind/lock despite the child possibly still running. */
6320void closeChildUnusedResourceAfterFork() {
6321 closeListeningSockets(0);
6322 if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1)
6323 close(server.cluster_config_file_lock_fd); /* don't care if this fails */
6324
6325 /* Clear server.pidfile, this is the parent pidfile which should not
6326 * be touched (or deleted) by the child (on exit / crash) */
6327 zfree(server.pidfile);
6328 server.pidfile = NULL;
6329}
6330
6331/* purpose is one of CHILD_TYPE_ types */
6332int redisFork(int purpose) {
6333 if (isMutuallyExclusiveChildType(purpose)) {
6334 if (hasActiveChildProcess()) {
6335 errno = EEXIST;
6336 return -1;
6337 }
6338
6339 openChildInfoPipe();
6340 }
6341
6342 int childpid;
6343 long long start = ustime();
6344 if ((childpid = fork()) == 0) {
6345 /* Child.
6346 *
6347 * The order of setting things up follows some reasoning:
6348 * Setup signal handlers first because a signal could fire at any time.
6349 * Adjust OOM score before everything else to assist the OOM killer if
6350 * memory resources are low.
6351 */
6352 server.in_fork_child = purpose;
6353 setupChildSignalHandlers();
6354 setOOMScoreAdj(CONFIG_OOM_BGCHILD);
6355 dismissMemoryInChild();
6356 closeChildUnusedResourceAfterFork();
6357 } else {
6358 /* Parent */
6359 if (childpid == -1) {
6360 int fork_errno = errno;
6361 if (isMutuallyExclusiveChildType(purpose)) closeChildInfoPipe();
6362 errno = fork_errno;
6363 return -1;
6364 }
6365
6366 server.stat_total_forks++;
6367 server.stat_fork_time = ustime()-start;
6368 server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
6369 latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
6370
6371 /* The child_pid and child_type are only for mutually exclusive children.
6372 * other child types should handle and store their pid's in dedicated variables.
6373 *
6374 * Today, we allows CHILD_TYPE_LDB to run in parallel with the other fork types:
6375 * - it isn't used for production, so it will not make the server be less efficient
6376 * - used for debugging, and we don't want to block it from running while other
6377 * forks are running (like RDB and AOF) */
6378 if (isMutuallyExclusiveChildType(purpose)) {
6379 server.child_pid = childpid;
6380 server.child_type = purpose;
6381 server.stat_current_cow_peak = 0;
6382 server.stat_current_cow_bytes = 0;
6383 server.stat_current_cow_updated = 0;
6384 server.stat_current_save_keys_processed = 0;
6385 server.stat_module_progress = 0;
6386 server.stat_current_save_keys_total = dbTotalServerKeyCount();
6387 }
6388
6389 updateDictResizePolicy();
6390 moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
6391 REDISMODULE_SUBEVENT_FORK_CHILD_BORN,
6392 NULL);
6393 }
6394 return childpid;
6395}
6396
6397void sendChildCowInfo(childInfoType info_type, char *pname) {
6398 sendChildInfoGeneric(info_type, 0, -1, pname);
6399}
6400
6401void sendChildInfo(childInfoType info_type, size_t keys, char *pname) {
6402 sendChildInfoGeneric(info_type, keys, -1, pname);
6403}
6404
6405/* Try to release pages back to the OS directly (bypassing the allocator),
6406 * in an effort to decrease CoW during fork. For small allocations, we can't
6407 * release any full page, so in an effort to avoid getting the size of the
6408 * allocation from the allocator (malloc_size) when we already know it's small,
6409 * we check the size_hint. If the size is not already known, passing a size_hint
6410 * of 0 will lead the checking the real size of the allocation.
6411 * Also please note that the size may be not accurate, so in order to make this
6412 * solution effective, the judgement for releasing memory pages should not be
6413 * too strict. */
6414void dismissMemory(void* ptr, size_t size_hint) {
6415 if (ptr == NULL) return;
6416
6417 /* madvise(MADV_DONTNEED) can not release pages if the size of memory
6418 * is too small, we try to release only for the memory which the size
6419 * is more than half of page size. */
6420 if (size_hint && size_hint <= server.page_size/2) return;
6421
6422 zmadvise_dontneed(ptr);
6423}
6424
6425/* Dismiss big chunks of memory inside a client structure, see dismissMemory() */
6426void dismissClientMemory(client *c) {
6427 /* Dismiss client query buffer and static reply buffer. */
6428 dismissMemory(c->buf, c->buf_usable_size);
6429 dismissSds(c->querybuf);
6430 /* Dismiss argv array only if we estimate it contains a big buffer. */
6431 if (c->argc && c->argv_len_sum/c->argc >= server.page_size) {
6432 for (int i = 0; i < c->argc; i++) {
6433 dismissObject(c->argv[i], 0);
6434 }
6435 }
6436 if (c->argc) dismissMemory(c->argv, c->argc*sizeof(robj*));
6437
6438 /* Dismiss the reply array only if the average buffer size is bigger
6439 * than a page. */
6440 if (listLength(c->reply) &&
6441 c->reply_bytes/listLength(c->reply) >= server.page_size)
6442 {
6443 listIter li;
6444 listNode *ln;
6445 listRewind(c->reply, &li);
6446 while ((ln = listNext(&li))) {
6447 clientReplyBlock *bulk = listNodeValue(ln);
6448 /* Default bulk size is 16k, actually it has extra data, maybe it
6449 * occupies 20k according to jemalloc bin size if using jemalloc. */
6450 if (bulk) dismissMemory(bulk, bulk->size);
6451 }
6452 }
6453}
6454
6455/* In the child process, we don't need some buffers anymore, and these are
6456 * likely to change in the parent when there's heavy write traffic.
6457 * We dismiss them right away, to avoid CoW.
6458 * see dismissMemeory(). */
6459void dismissMemoryInChild(void) {
6460 /* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
6461 if (server.thp_enabled) return;
6462
6463 /* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
6464 * so we avoid these pointless loops when they're not going to do anything. */
6465#if defined(USE_JEMALLOC) && defined(__linux__)
6466 listIter li;
6467 listNode *ln;
6468
6469 /* Dismiss replication buffer. We don't need to separately dismiss replication
6470 * backlog and replica' output buffer, because they just reference the global
6471 * replication buffer but don't cost real memory. */
6472 listRewind(server.repl_buffer_blocks, &li);
6473 while((ln = listNext(&li))) {
6474 replBufBlock *o = listNodeValue(ln);
6475 dismissMemory(o, o->size);
6476 }
6477
6478 /* Dismiss all clients memory. */
6479 listRewind(server.clients, &li);
6480 while((ln = listNext(&li))) {
6481 client *c = listNodeValue(ln);
6482 dismissClientMemory(c);
6483 }
6484#endif
6485}
6486
6487void memtest(size_t megabytes, int passes);
6488
6489/* Returns 1 if there is --sentinel among the arguments or if
6490 * executable name contains "redis-sentinel". */
6491int checkForSentinelMode(int argc, char **argv, char *exec_name) {
6492 if (strstr(exec_name,"redis-sentinel") != NULL) return 1;
6493
6494 for (int j = 1; j < argc; j++)
6495 if (!strcmp(argv[j],"--sentinel")) return 1;
6496 return 0;
6497}
6498
6499/* Function called at startup to load RDB or AOF file in memory. */
6500void loadDataFromDisk(void) {
6501 long long start = ustime();
6502 if (server.aof_state == AOF_ON) {
6503 int ret = loadAppendOnlyFiles(server.aof_manifest);
6504 if (ret == AOF_FAILED || ret == AOF_OPEN_ERR)
6505 exit(1);
6506 if (ret != AOF_NOT_EXIST)
6507 serverLog(LL_NOTICE, "DB loaded from append only file: %.3f seconds", (float)(ustime()-start)/1000000);
6508 } else {
6509 rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
6510 errno = 0; /* Prevent a stale value from affecting error checking */
6511 int rdb_flags = RDBFLAGS_NONE;
6512 if (iAmMaster()) {
6513 /* Master may delete expired keys when loading, we should
6514 * propagate expire to replication backlog. */
6515 createReplicationBacklog();
6516 rdb_flags |= RDBFLAGS_FEED_REPL;
6517 }
6518 if (rdbLoad(server.rdb_filename,&rsi,rdb_flags) == C_OK) {
6519 serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
6520 (float)(ustime()-start)/1000000);
6521
6522 /* Restore the replication ID / offset from the RDB file. */
6523 if (rsi.repl_id_is_set &&
6524 rsi.repl_offset != -1 &&
6525 /* Note that older implementations may save a repl_stream_db
6526 * of -1 inside the RDB file in a wrong way, see more
6527 * information in function rdbPopulateSaveInfo. */
6528 rsi.repl_stream_db != -1)
6529 {
6530 if (!iAmMaster()) {
6531 memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
6532 server.master_repl_offset = rsi.repl_offset;
6533 /* If this is a replica, create a cached master from this
6534 * information, in order to allow partial resynchronizations
6535 * with masters. */
6536 replicationCacheMasterUsingMyself();
6537 selectDb(server.cached_master,rsi.repl_stream_db);
6538 } else {
6539 /* If this is a master, we can save the replication info
6540 * as secondary ID and offset, in order to allow replicas
6541 * to partial resynchronizations with masters. */
6542 memcpy(server.replid2,rsi.repl_id,sizeof(server.replid));
6543 server.second_replid_offset = rsi.repl_offset+1;
6544 /* Rebase master_repl_offset from rsi.repl_offset. */
6545 server.master_repl_offset += rsi.repl_offset;
6546 serverAssert(server.repl_backlog);
6547 server.repl_backlog->offset = server.master_repl_offset -
6548 server.repl_backlog->histlen + 1;
6549 rebaseReplicationBuffer(rsi.repl_offset);
6550 server.repl_no_slaves_since = time(NULL);
6551 }
6552 }
6553 } else if (errno != ENOENT) {
6554 serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno));
6555 exit(1);
6556 }
6557
6558 /* We always create replication backlog if server is a master, we need
6559 * it because we put DELs in it when loading expired keys in RDB, but
6560 * if RDB doesn't have replication info or there is no rdb, it is not
6561 * possible to support partial resynchronization, to avoid extra memory
6562 * of replication backlog, we drop it. */
6563 if (server.master_repl_offset == 0 && server.repl_backlog)
6564 freeReplicationBacklog();
6565 }
6566}
6567
6568void redisOutOfMemoryHandler(size_t allocation_size) {
6569 serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!",
6570 allocation_size);
6571 serverPanic("Redis aborting for OUT OF MEMORY. Allocating %zu bytes!",
6572 allocation_size);
6573}
6574
6575/* Callback for sdstemplate on proc-title-template. See redis.conf for
6576 * supported variables.
6577 */
6578static sds redisProcTitleGetVariable(const sds varname, void *arg)
6579{
6580 if (!strcmp(varname, "title")) {
6581 return sdsnew(arg);
6582 } else if (!strcmp(varname, "listen-addr")) {
6583 if (server.port || server.tls_port)
6584 return sdscatprintf(sdsempty(), "%s:%u",
6585 server.bindaddr_count ? server.bindaddr[0] : "*",
6586 server.port ? server.port : server.tls_port);
6587 else
6588 return sdscatprintf(sdsempty(), "unixsocket:%s", server.unixsocket);
6589 } else if (!strcmp(varname, "server-mode")) {
6590 if (server.cluster_enabled) return sdsnew("[cluster]");
6591 else if (server.sentinel_mode) return sdsnew("[sentinel]");
6592 else return sdsempty();
6593 } else if (!strcmp(varname, "config-file")) {
6594 return sdsnew(server.configfile ? server.configfile : "-");
6595 } else if (!strcmp(varname, "port")) {
6596 return sdscatprintf(sdsempty(), "%u", server.port);
6597 } else if (!strcmp(varname, "tls-port")) {
6598 return sdscatprintf(sdsempty(), "%u", server.tls_port);
6599 } else if (!strcmp(varname, "unixsocket")) {
6600 return sdsnew(server.unixsocket);
6601 } else
6602 return NULL; /* Unknown variable name */
6603}
6604
6605/* Expand the specified proc-title-template string and return a newly
6606 * allocated sds, or NULL. */
6607static sds expandProcTitleTemplate(const char *template, const char *title) {
6608 sds res = sdstemplate(template, redisProcTitleGetVariable, (void *) title);
6609 if (!res)
6610 return NULL;
6611 return sdstrim(res, " ");
6612}
6613/* Validate the specified template, returns 1 if valid or 0 otherwise. */
6614int validateProcTitleTemplate(const char *template) {
6615 int ok = 1;
6616 sds res = expandProcTitleTemplate(template, "");
6617 if (!res)
6618 return 0;
6619 if (sdslen(res) == 0) ok = 0;
6620 sdsfree(res);
6621 return ok;
6622}
6623
6624int redisSetProcTitle(char *title) {
6625#ifdef USE_SETPROCTITLE
6626 if (!title) title = server.exec_argv[0];
6627 sds proc_title = expandProcTitleTemplate(server.proc_title_template, title);
6628 if (!proc_title) return C_ERR; /* Not likely, proc_title_template is validated */
6629
6630 setproctitle("%s", proc_title);
6631 sdsfree(proc_title);
6632#else
6633 UNUSED(title);
6634#endif
6635
6636 return C_OK;
6637}
6638
6639void redisSetCpuAffinity(const char *cpulist) {
6640#ifdef USE_SETCPUAFFINITY
6641 setcpuaffinity(cpulist);
6642#else
6643 UNUSED(cpulist);
6644#endif
6645}
6646
6647/* Send a notify message to systemd. Returns sd_notify return code which is
6648 * a positive number on success. */
6649int redisCommunicateSystemd(const char *sd_notify_msg) {
6650#ifdef HAVE_LIBSYSTEMD
6651 int ret = sd_notify(0, sd_notify_msg);
6652
6653 if (ret == 0)
6654 serverLog(LL_WARNING, "systemd supervision error: NOTIFY_SOCKET not found!");
6655 else if (ret < 0)
6656 serverLog(LL_WARNING, "systemd supervision error: sd_notify: %d", ret);
6657 return ret;
6658#else
6659 UNUSED(sd_notify_msg);
6660 return 0;
6661#endif
6662}
6663
6664/* Attempt to set up upstart supervision. Returns 1 if successful. */
6665static int redisSupervisedUpstart(void) {
6666 const char *upstart_job = getenv("UPSTART_JOB");
6667
6668 if (!upstart_job) {
6669 serverLog(LL_WARNING,
6670 "upstart supervision requested, but UPSTART_JOB not found!");
6671 return 0;
6672 }
6673
6674 serverLog(LL_NOTICE, "supervised by upstart, will stop to signal readiness.");
6675 raise(SIGSTOP);
6676 unsetenv("UPSTART_JOB");
6677 return 1;
6678}
6679
6680/* Attempt to set up systemd supervision. Returns 1 if successful. */
6681static int redisSupervisedSystemd(void) {
6682#ifndef HAVE_LIBSYSTEMD
6683 serverLog(LL_WARNING,
6684 "systemd supervision requested or auto-detected, but Redis is compiled without libsystemd support!");
6685 return 0;
6686#else
6687 if (redisCommunicateSystemd("STATUS=Redis is loading...\n") <= 0)
6688 return 0;
6689 serverLog(LL_NOTICE,
6690 "Supervised by systemd. Please make sure you set appropriate values for TimeoutStartSec and TimeoutStopSec in your service unit.");
6691 return 1;
6692#endif
6693}
6694
6695int redisIsSupervised(int mode) {
6696 int ret = 0;
6697
6698 if (mode == SUPERVISED_AUTODETECT) {
6699 if (getenv("UPSTART_JOB")) {
6700 serverLog(LL_VERBOSE, "Upstart supervision detected.");
6701 mode = SUPERVISED_UPSTART;
6702 } else if (getenv("NOTIFY_SOCKET")) {
6703 serverLog(LL_VERBOSE, "Systemd supervision detected.");
6704 mode = SUPERVISED_SYSTEMD;
6705 }
6706 }
6707
6708 switch (mode) {
6709 case SUPERVISED_UPSTART:
6710 ret = redisSupervisedUpstart();
6711 break;
6712 case SUPERVISED_SYSTEMD:
6713 ret = redisSupervisedSystemd();
6714 break;
6715 default:
6716 break;
6717 }
6718
6719 if (ret)
6720 server.supervised_mode = mode;
6721
6722 return ret;
6723}
6724
6725int iAmMaster(void) {
6726 return ((!server.cluster_enabled && server.masterhost == NULL) ||
6727 (server.cluster_enabled && nodeIsMaster(server.cluster->myself)));
6728}
6729
6730#ifdef REDIS_TEST
6731#include "testhelp.h"
6732
6733int __failed_tests = 0;
6734int __test_num = 0;
6735
6736/* The flags are the following:
6737* --accurate: Runs tests with more iterations.
6738* --large-memory: Enables tests that consume more than 100mb. */
6739typedef int redisTestProc(int argc, char **argv, int flags);
6740struct redisTest {
6741 char *name;
6742 redisTestProc *proc;
6743 int failed;
6744} redisTests[] = {
6745 {"ziplist", ziplistTest},
6746 {"quicklist", quicklistTest},
6747 {"intset", intsetTest},
6748 {"zipmap", zipmapTest},
6749 {"sha1test", sha1Test},
6750 {"util", utilTest},
6751 {"endianconv", endianconvTest},
6752 {"crc64", crc64Test},
6753 {"zmalloc", zmalloc_test},
6754 {"sds", sdsTest},
6755 {"dict", dictTest},
6756 {"listpack", listpackTest}
6757};
6758redisTestProc *getTestProcByName(const char *name) {
6759 int numtests = sizeof(redisTests)/sizeof(struct redisTest);
6760 for (int j = 0; j < numtests; j++) {
6761 if (!strcasecmp(name,redisTests[j].name)) {
6762 return redisTests[j].proc;
6763 }
6764 }
6765 return NULL;
6766}
6767#endif
6768
6769int main(int argc, char **argv) {
6770 struct timeval tv;
6771 int j;
6772 char config_from_stdin = 0;
6773
6774#ifdef REDIS_TEST
6775 if (argc >= 3 && !strcasecmp(argv[1], "test")) {
6776 int flags = 0;
6777 for (j = 3; j < argc; j++) {
6778 char *arg = argv[j];
6779 if (!strcasecmp(arg, "--accurate")) flags |= REDIS_TEST_ACCURATE;
6780 else if (!strcasecmp(arg, "--large-memory")) flags |= REDIS_TEST_LARGE_MEMORY;
6781 }
6782
6783 if (!strcasecmp(argv[2], "all")) {
6784 int numtests = sizeof(redisTests)/sizeof(struct redisTest);
6785 for (j = 0; j < numtests; j++) {
6786 redisTests[j].failed = (redisTests[j].proc(argc,argv,flags) != 0);
6787 }
6788
6789 /* Report tests result */
6790 int failed_num = 0;
6791 for (j = 0; j < numtests; j++) {
6792 if (redisTests[j].failed) {
6793 failed_num++;
6794 printf("[failed] Test - %s\n", redisTests[j].name);
6795 } else {
6796 printf("[ok] Test - %s\n", redisTests[j].name);
6797 }
6798 }
6799
6800 printf("%d tests, %d passed, %d failed\n", numtests,
6801 numtests-failed_num, failed_num);
6802
6803 return failed_num == 0 ? 0 : 1;
6804 } else {
6805 redisTestProc *proc = getTestProcByName(argv[2]);
6806 if (!proc) return -1; /* test not found */
6807 return proc(argc,argv,flags);
6808 }
6809
6810 return 0;
6811 }
6812#endif
6813
6814 /* We need to initialize our libraries, and the server configuration. */
6815#ifdef INIT_SETPROCTITLE_REPLACEMENT
6816 spt_init(argc, argv);
6817#endif
6818 setlocale(LC_COLLATE,"");
6819 tzset(); /* Populates 'timezone' global. */
6820 zmalloc_set_oom_handler(redisOutOfMemoryHandler);
6821
6822 /* To achieve entropy, in case of containers, their time() and getpid() can
6823 * be the same. But value of tv_usec is fast enough to make the difference */
6824 gettimeofday(&tv,NULL);
6825 srand(time(NULL)^getpid()^tv.tv_usec);
6826 srandom(time(NULL)^getpid()^tv.tv_usec);
6827 init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
6828 crc64_init();
6829
6830 /* Store umask value. Because umask(2) only offers a set-and-get API we have
6831 * to reset it and restore it back. We do this early to avoid a potential
6832 * race condition with threads that could be creating files or directories.
6833 */
6834 umask(server.umask = umask(0777));
6835
6836 uint8_t hashseed[16];
6837 getRandomBytes(hashseed,sizeof(hashseed));
6838 dictSetHashFunctionSeed(hashseed);
6839
6840 char *exec_name = strrchr(argv[0], '/');
6841 if (exec_name == NULL) exec_name = argv[0];
6842 server.sentinel_mode = checkForSentinelMode(argc,argv, exec_name);
6843 initServerConfig();
6844 ACLInit(); /* The ACL subsystem must be initialized ASAP because the
6845 basic networking code and client creation depends on it. */
6846 moduleInitModulesSystem();
6847 tlsInit();
6848
6849 /* Store the executable path and arguments in a safe place in order
6850 * to be able to restart the server later. */
6851 server.executable = getAbsolutePath(argv[0]);
6852 server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
6853 server.exec_argv[argc] = NULL;
6854 for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);
6855
6856 /* We need to init sentinel right now as parsing the configuration file
6857 * in sentinel mode will have the effect of populating the sentinel
6858 * data structures with master nodes to monitor. */
6859 if (server.sentinel_mode) {
6860 initSentinelConfig();
6861 initSentinel();
6862 }
6863
6864 /* Check if we need to start in redis-check-rdb/aof mode. We just execute
6865 * the program main. However the program is part of the Redis executable
6866 * so that we can easily execute an RDB check on loading errors. */
6867 if (strstr(exec_name,"redis-check-rdb") != NULL)
6868 redis_check_rdb_main(argc,argv,NULL);
6869 else if (strstr(exec_name,"redis-check-aof") != NULL)
6870 redis_check_aof_main(argc,argv);
6871
6872 if (argc >= 2) {
6873 j = 1; /* First option to parse in argv[] */
6874 sds options = sdsempty();
6875
6876 /* Handle special options --help and --version */
6877 if (strcmp(argv[1], "-v") == 0 ||
6878 strcmp(argv[1], "--version") == 0) version();
6879 if (strcmp(argv[1], "--help") == 0 ||
6880 strcmp(argv[1], "-h") == 0) usage();
6881 if (strcmp(argv[1], "--test-memory") == 0) {
6882 if (argc == 3) {
6883 memtest(atoi(argv[2]),50);
6884 exit(0);
6885 } else {
6886 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
6887 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
6888 exit(1);
6889 }
6890 } if (strcmp(argv[1], "--check-system") == 0) {
6891 exit(syscheck() ? 0 : 1);
6892 }
6893 /* Parse command line options
6894 * Precedence wise, File, stdin, explicit options -- last config is the one that matters.
6895 *
6896 * First argument is the config file name? */
6897 if (argv[1][0] != '-') {
6898 /* Replace the config file in server.exec_argv with its absolute path. */
6899 server.configfile = getAbsolutePath(argv[1]);
6900 zfree(server.exec_argv[1]);
6901 server.exec_argv[1] = zstrdup(server.configfile);
6902 j = 2; // Skip this arg when parsing options
6903 }
6904 sds *argv_tmp;
6905 int argc_tmp;
6906 int handled_last_config_arg = 1;
6907 while(j < argc) {
6908 /* Either first or last argument - Should we read config from stdin? */
6909 if (argv[j][0] == '-' && argv[j][1] == '\0' && (j == 1 || j == argc-1)) {
6910 config_from_stdin = 1;
6911 }
6912 /* All the other options are parsed and conceptually appended to the
6913 * configuration file. For instance --port 6380 will generate the
6914 * string "port 6380\n" to be parsed after the actual config file
6915 * and stdin input are parsed (if they exist).
6916 * Only consider that if the last config has at least one argument. */
6917 else if (handled_last_config_arg && argv[j][0] == '-' && argv[j][1] == '-') {
6918 /* Option name */
6919 if (sdslen(options)) options = sdscat(options,"\n");
6920 /* argv[j]+2 for removing the preceding `--` */
6921 options = sdscat(options,argv[j]+2);
6922 options = sdscat(options," ");
6923
6924 argv_tmp = sdssplitargs(argv[j], &argc_tmp);
6925 if (argc_tmp == 1) {
6926 /* Means that we only have one option name, like --port or "--port " */
6927 handled_last_config_arg = 0;
6928
6929 if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
6930 !strcasecmp(argv[j], "--save"))
6931 {
6932 /* Special case: handle some things like `--save --config value`.
6933 * In this case, if next argument starts with `--`, we will reset
6934 * handled_last_config_arg flag and append an empty "" config value
6935 * to the options, so it will become `--save "" --config value`.
6936 * We are doing it to be compatible with pre 7.0 behavior (which we
6937 * break it in #10660, 7.0.1), since there might be users who generate
6938 * a command line from an array and when it's empty that's what they produce. */
6939 options = sdscat(options, "\"\"");
6940 handled_last_config_arg = 1;
6941 }
6942 else if ((j == argc-1) && !strcasecmp(argv[j], "--save")) {
6943 /* Special case: when empty save is the last argument.
6944 * In this case, we append an empty "" config value to the options,
6945 * so it will become `--save ""` and will follow the same reset thing. */
6946 options = sdscat(options, "\"\"");
6947 }
6948 } else {
6949 /* Means that we are passing both config name and it's value in the same arg,
6950 * like "--port 6380", so we need to reset handled_last_config_arg flag. */
6951 handled_last_config_arg = 1;
6952 }
6953 sdsfreesplitres(argv_tmp, argc_tmp);
6954 } else {
6955 /* Option argument */
6956 options = sdscatrepr(options,argv[j],strlen(argv[j]));
6957 options = sdscat(options," ");
6958 handled_last_config_arg = 1;
6959 }
6960 j++;
6961 }
6962
6963 loadServerConfig(server.configfile, config_from_stdin, options);
6964 if (server.sentinel_mode) loadSentinelConfigFromQueue();
6965 sdsfree(options);
6966 }
6967 if (server.sentinel_mode) sentinelCheckConfigFile();
6968 server.supervised = redisIsSupervised(server.supervised_mode);
6969 int background = server.daemonize && !server.supervised;
6970 if (background) daemonize();
6971
6972 serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
6973 serverLog(LL_WARNING,
6974 "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
6975 REDIS_VERSION,
6976 (sizeof(long) == 8) ? 64 : 32,
6977 redisGitSHA1(),
6978 strtol(redisGitDirty(),NULL,10) > 0,
6979 (int)getpid());
6980
6981 if (argc == 1) {
6982 serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/redis.conf", argv[0]);
6983 } else {
6984 serverLog(LL_WARNING, "Configuration loaded");
6985 }
6986
6987 initServer();
6988 if (background || server.pidfile) createPidFile();
6989 if (server.set_proc_title) redisSetProcTitle(NULL);
6990 redisAsciiArt();
6991 checkTcpBacklogSettings();
6992
6993 if (!server.sentinel_mode) {
6994 /* Things not needed when running in Sentinel mode. */
6995 serverLog(LL_WARNING,"Server initialized");
6996 #ifdef __linux__
6997 linuxMemoryWarnings();
6998 sds err_msg = NULL;
6999 if (checkXenClocksource(&err_msg) < 0) {
7000 serverLog(LL_WARNING, "WARNING %s", err_msg);
7001 sdsfree(err_msg);
7002 }
7003 #if defined (__arm64__)
7004 int ret;
7005 if ((ret = checkLinuxMadvFreeForkBug(&err_msg)) <= 0) {
7006 if (ret < 0) {
7007 serverLog(LL_WARNING, "WARNING %s", err_msg);
7008 sdsfree(err_msg);
7009 } else
7010 serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. "
7011 "Your system could be affected, please report this error.");
7012 if (!checkIgnoreWarning("ARM64-COW-BUG")) {
7013 serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. "
7014 "Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
7015 exit(1);
7016 }
7017 }
7018 #endif /* __arm64__ */
7019 #endif /* __linux__ */
7020 moduleInitModulesSystemLast();
7021 moduleLoadFromQueue();
7022 ACLLoadUsersAtStartup();
7023 InitServerLast();
7024 aofLoadManifestFromDisk();
7025 loadDataFromDisk();
7026 aofOpenIfNeededOnServerStart();
7027 aofDelHistoryFiles();
7028 if (server.cluster_enabled) {
7029 if (verifyClusterConfigWithData() == C_ERR) {
7030 serverLog(LL_WARNING,
7031 "You can't have keys in a DB different than DB 0 when in "
7032 "Cluster mode. Exiting.");
7033 exit(1);
7034 }
7035 }
7036 if (server.ipfd.count > 0 || server.tlsfd.count > 0)
7037 serverLog(LL_NOTICE,"Ready to accept connections");
7038 if (server.sofd > 0)
7039 serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
7040 if (server.supervised_mode == SUPERVISED_SYSTEMD) {
7041 if (!server.masterhost) {
7042 redisCommunicateSystemd("STATUS=Ready to accept connections\n");
7043 } else {
7044 redisCommunicateSystemd("STATUS=Ready to accept connections in read-only mode. Waiting for MASTER <-> REPLICA sync\n");
7045 }
7046 redisCommunicateSystemd("READY=1\n");
7047 }
7048 } else {
7049 ACLLoadUsersAtStartup();
7050 InitServerLast();
7051 sentinelIsRunning();
7052 if (server.supervised_mode == SUPERVISED_SYSTEMD) {
7053 redisCommunicateSystemd("STATUS=Ready to accept connections\n");
7054 redisCommunicateSystemd("READY=1\n");
7055 }
7056 }
7057
7058 /* Warning the user about suspicious maxmemory setting. */
7059 if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
7060 serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
7061 }
7062
7063 redisSetCpuAffinity(server.server_cpulist);
7064 setOOMScoreAdj(-1);
7065
7066 aeMain(server.el);
7067 aeDeleteEventLoop(server.el);
7068 return 0;
7069}
7070
7071/* The End */
7072