object.c source code [redis/src/object.c]

1	/ Redis Object implementation.*
2	*
3	* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions are met:
8	*
9	* * Redistributions of source code must retain the above copyright notice,
10	* this list of conditions and the following disclaimer.
11	* * Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* * Neither the name of Redis nor the names of its contributors may be used
15	* to endorse or promote products derived from this software without
16	* specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28	* POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	#include "server.h"
32	#include "functions.h"
33	#include <math.h>
34	#include <ctype.h>
35
36	#ifdef __CYGWIN__
37	#define strtold(a,b) ((long double)strtod((a),(b)))
38	#endif
39
40	/ ===================== Creation and parsing of objects ==================== /
41
42	robj createObject(int* type, void *ptr) {
43	robj o = zmalloc(sizeof(o));
44	o->type = type;
45	o->encoding = OBJ_ENCODING_RAW;
46	o->ptr = ptr;
47	o->refcount = `1`;
48
49	/ Set the LRU to the current lruclock (minutes resolution), or*
50	* alternatively the LFU counter. */
51	if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
52	o->lru = (LFUGetTimeInMinutes()<<`8`) \| LFU_INIT_VAL;
53	} else {
54	o->lru = LRU_CLOCK();
55	}
56	return o;
57	}
58
59	/ Set a special refcount in the object to make it "shared":*
60	* incrRefCount and decrRefCount() will test for this special refcount
61	* and will not touch the object. This way it is free to access shared
62	* objects such as small integers from different threads without any
63	* mutex.
64	*
65	* A common patter to create shared objects:
66	*
67	* robj *myobject = makeObjectShared(createObject(...));
68	*
69	*/
70	robj makeObjectShared(robj o) {
71	serverAssert(o->refcount == `1`);
72	o->refcount = OBJ_SHARED_REFCOUNT;
73	return o;
74	}
75
76	/ Create a string object with encoding OBJ_ENCODING_RAW, that is a plain*
77	* string object where o->ptr points to a proper sds string. */
78	robj createRawStringObject(const* char *ptr, size_t len) {
79	return createObject(OBJ_STRING, sdsnewlen(ptr,len));
80	}
81
82	/ Create a string object with encoding OBJ_ENCODING_EMBSTR, that is*
83	* an object where the sds string is actually an unmodifiable string
84	* allocated in the same chunk as the object itself. */
85	robj createEmbeddedStringObject(const* char *ptr, size_t len) {
86	robj o = zmalloc(sizeof(robj)+sizeof(struct* sdshdr8)+len+`1`);
87	struct sdshdr8 sh = (void**)(o+`1`);
88
89	o->type = OBJ_STRING;
90	o->encoding = OBJ_ENCODING_EMBSTR;
91	o->ptr = sh+`1`;
92	o->refcount = `1`;
93	if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
94	o->lru = (LFUGetTimeInMinutes()<<`8`) \| LFU_INIT_VAL;
95	} else {
96	o->lru = LRU_CLOCK();
97	}
98
99	sh->len = len;
100	sh->alloc = len;
101	sh->flags = SDS_TYPE_8;
102	if (ptr == SDS_NOINIT)
103	sh->buf[len] = `'\0'`;
104	else if (ptr) {
105	memcpy(sh->buf,ptr,len);
106	sh->buf[len] = `'\0'`;
107	} else {
108	memset(sh->buf,`0`,len+`1`);
109	}
110	return o;
111	}
112
113	/ Create a string object with EMBSTR encoding if it is smaller than*
114	* OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
115	* used.
116	*
117	* The current limit of 44 is chosen so that the biggest string object
118	* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
119	#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
120	robj createStringObject(const* char *ptr, size_t len) {
121	if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
122	return createEmbeddedStringObject(ptr,len);
123	else
124	return createRawStringObject(ptr,len);
125	}
126
127	/ Same as CreateRawStringObject, can return NULL if allocation fails /
128	robj tryCreateRawStringObject(const* char *ptr, size_t len) {
129	sds str = sdstrynewlen(ptr,len);
130	if (!str) return NULL;
131	return createObject(OBJ_STRING, str);
132	}
133
134	/ Same as createStringObject, can return NULL if allocation fails /
135	robj tryCreateStringObject(const* char *ptr, size_t len) {
136	if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
137	return createEmbeddedStringObject(ptr,len);
138	else
139	return tryCreateRawStringObject(ptr,len);
140	}
141
142	/ Create a string object from a long long value. When possible returns a*
143	* shared integer object, or at least an integer encoded one.
144	*
145	* If valueobj is non zero, the function avoids returning a shared
146	* integer, because the object is going to be used as value in the Redis key
147	* space (for instance when the INCR command is used), so we want LFU/LRU
148	* values specific for each key. */
149	robj createStringObjectFromLongLongWithOptions(long* long value, int valueobj) {
150	robj *o;
151
152	if (server.maxmemory == `0` \|\|
153	!(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
154	{
155	/ If the maxmemory policy permits, we can still return shared integers*
156	* even if valueobj is true. */
157	valueobj = `0`;
158	}
159
160	if (value >= `0` && value < OBJ_SHARED_INTEGERS && valueobj == `0`) {
161	incrRefCount(shared.integers[value]);
162	o = shared.integers[value];
163	} else {
164	if (value >= LONG_MIN && value <= LONG_MAX) {
165	o = createObject(OBJ_STRING, NULL);
166	o->encoding = OBJ_ENCODING_INT;
167	o->ptr = (void)((long*)value);
168	} else {
169	o = createObject(OBJ_STRING,sdsfromlonglong(value));
170	}
171	}
172	return o;
173	}
174
175	/ Wrapper for createStringObjectFromLongLongWithOptions() always demanding*
176	* to create a shared object if possible. */
177	robj createStringObjectFromLongLong(long* long value) {
178	return createStringObjectFromLongLongWithOptions(value,`0`);
179	}
180
181	/ Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared*
182	* object when LFU/LRU info are needed, that is, when the object is used
183	* as a value in the key space, and Redis is configured to evict based on
184	* LFU/LRU. */
185	robj createStringObjectFromLongLongForValue(long* long value) {
186	return createStringObjectFromLongLongWithOptions(value,`1`);
187	}
188
189	/ Create a string object from a long double. If humanfriendly is non-zero*
190	* it does not use exponential format and trims trailing zeroes at the end,
191	* however this results in loss of precision. Otherwise exp format is used
192	* and the output of snprintf() is not modified.
193	*
194	* The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
195	robj createStringObjectFromLongDouble(long* double value, int humanfriendly) {
196	char buf[MAX_LONG_DOUBLE_CHARS];
197	int len = ld2string(buf,sizeof(buf),value,humanfriendly? LD_STR_HUMAN: LD_STR_AUTO);
198	return createStringObject(buf,len);
199	}
200
201	/ Duplicate a string object, with the guarantee that the returned object*
202	* has the same encoding as the original one.
203	*
204	* This function also guarantees that duplicating a small integer object
205	* (or a string object that contains a representation of a small integer)
206	* will always result in a fresh object that is unshared (refcount == 1).
207	*
208	* The resulting object always has refcount set to 1. */
209	robj dupStringObject(const* robj *o) {
210	robj *d;
211
212	serverAssert(o->type == OBJ_STRING);
213
214	switch(o->encoding) {
215	case OBJ_ENCODING_RAW:
216	return createRawStringObject(o->ptr,sdslen(o->ptr));
217	case OBJ_ENCODING_EMBSTR:
218	return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
219	case OBJ_ENCODING_INT:
220	d = createObject(OBJ_STRING, NULL);
221	d->encoding = OBJ_ENCODING_INT;
222	d->ptr = o->ptr;
223	return d;
224	default:
225	serverPanic("Wrong encoding.");
226	break;
227	}
228	}
229
230	robj createQuicklistObject(void*) {
231	quicklist *l = quicklistCreate();
232	robj *o = createObject(OBJ_LIST,l);
233	o->encoding = OBJ_ENCODING_QUICKLIST;
234	return o;
235	}
236
237	robj createSetObject(void*) {
238	dict *d = dictCreate(&setDictType);
239	robj *o = createObject(OBJ_SET,d);
240	o->encoding = OBJ_ENCODING_HT;
241	return o;
242	}
243
244	robj createIntsetObject(void*) {
245	intset *is = intsetNew();
246	robj *o = createObject(OBJ_SET,is);
247	o->encoding = OBJ_ENCODING_INTSET;
248	return o;
249	}
250
251	robj createHashObject(void*) {
252	unsigned char *zl = lpNew(`0`);
253	robj *o = createObject(OBJ_HASH, zl);
254	o->encoding = OBJ_ENCODING_LISTPACK;
255	return o;
256	}
257
258	robj createZsetObject(void*) {
259	zset zs = zmalloc(sizeof(zs));
260	robj *o;
261
262	zs->dict = dictCreate(&zsetDictType);
263	zs->zsl = zslCreate();
264	o = createObject(OBJ_ZSET,zs);
265	o->encoding = OBJ_ENCODING_SKIPLIST;
266	return o;
267	}
268
269	robj createZsetListpackObject(void*) {
270	unsigned char *lp = lpNew(`0`);
271	robj *o = createObject(OBJ_ZSET,lp);
272	o->encoding = OBJ_ENCODING_LISTPACK;
273	return o;
274	}
275
276	robj createStreamObject(void*) {
277	stream *s = streamNew();
278	robj *o = createObject(OBJ_STREAM,s);
279	o->encoding = OBJ_ENCODING_STREAM;
280	return o;
281	}
282
283	robj createModuleObject(moduleType mt, void *value) {
284	moduleValue mv = zmalloc(sizeof(mv));
285	mv->type = mt;
286	mv->value = value;
287	return createObject(OBJ_MODULE,mv);
288	}
289
290	void freeStringObject(robj *o) {
291	if (o->encoding == OBJ_ENCODING_RAW) {
292	sdsfree(o->ptr);
293	}
294	}
295
296	void freeListObject(robj *o) {
297	if (o->encoding == OBJ_ENCODING_QUICKLIST) {
298	quicklistRelease(o->ptr);
299	} else {
300	serverPanic("Unknown list encoding type");
301	}
302	}
303
304	void freeSetObject(robj *o) {
305	switch (o->encoding) {
306	case OBJ_ENCODING_HT:
307	dictRelease((dict*) o->ptr);
308	break;
309	case OBJ_ENCODING_INTSET:
310	zfree(o->ptr);
311	break;
312	default:
313	serverPanic("Unknown set encoding type");
314	}
315	}
316
317	void freeZsetObject(robj *o) {
318	zset *zs;
319	switch (o->encoding) {
320	case OBJ_ENCODING_SKIPLIST:
321	zs = o->ptr;
322	dictRelease(zs->dict);
323	zslFree(zs->zsl);
324	zfree(zs);
325	break;
326	case OBJ_ENCODING_LISTPACK:
327	zfree(o->ptr);
328	break;
329	default:
330	serverPanic("Unknown sorted set encoding");
331	}
332	}
333
334	void freeHashObject(robj *o) {
335	switch (o->encoding) {
336	case OBJ_ENCODING_HT:
337	dictRelease((dict*) o->ptr);
338	break;
339	case OBJ_ENCODING_LISTPACK:
340	lpFree(o->ptr);
341	break;
342	default:
343	serverPanic("Unknown hash encoding type");
344	break;
345	}
346	}
347
348	void freeModuleObject(robj *o) {
349	moduleValue *mv = o->ptr;
350	mv->type->free(mv->value);
351	zfree(mv);
352	}
353
354	void freeStreamObject(robj *o) {
355	freeStream(o->ptr);
356	}
357
358	void incrRefCount(robj *o) {
359	if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT) {
360	o->refcount++;
361	} else {
362	if (o->refcount == OBJ_SHARED_REFCOUNT) {
363	/ Nothing to do: this refcount is immutable. /
364	} else if (o->refcount == OBJ_STATIC_REFCOUNT) {
365	serverPanic("You tried to retain an object allocated in the stack");
366	}
367	}
368	}
369
370	void decrRefCount(robj *o) {
371	if (o->refcount == `1`) {
372	switch(o->type) {
373	case OBJ_STRING: freeStringObject(o); break;
374	case OBJ_LIST: freeListObject(o); break;
375	case OBJ_SET: freeSetObject(o); break;
376	case OBJ_ZSET: freeZsetObject(o); break;
377	case OBJ_HASH: freeHashObject(o); break;
378	case OBJ_MODULE: freeModuleObject(o); break;
379	case OBJ_STREAM: freeStreamObject(o); break;
380	default: serverPanic("Unknown object type"); break;
381	}
382	zfree(o);
383	} else {
384	if (o->refcount <= `0`) serverPanic("decrRefCount against refcount <= 0");
385	if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
386	}
387	}
388
389	/ See dismissObject() /
390	void dismissSds(sds s) {
391	dismissMemory(sdsAllocPtr(s), sdsAllocSize(s));
392	}
393
394	/ See dismissObject() /
395	void dismissStringObject(robj *o) {
396	if (o->encoding == OBJ_ENCODING_RAW) {
397	dismissSds(o->ptr);
398	}
399	}
400
401	/ See dismissObject() /
402	void dismissListObject(robj *o, size_t size_hint) {
403	if (o->encoding == OBJ_ENCODING_QUICKLIST) {
404	quicklist *ql = o->ptr;
405	serverAssert(ql->len != `0`);
406	/ We iterate all nodes only when average node size is bigger than a*
407	* page size, and there's a high chance we'll actually dismiss something. */
408	if (size_hint / ql->len >= server.page_size) {
409	quicklistNode *node = ql->head;
410	while (node) {
411	if (quicklistNodeIsCompressed(node)) {
412	dismissMemory(node->entry, ((quicklistLZF*)node->entry)->sz);
413	} else {
414	dismissMemory(node->entry, node->sz);
415	}
416	node = node->next;
417	}
418	}
419	} else {
420	serverPanic("Unknown list encoding type");
421	}
422	}
423
424	/ See dismissObject() /
425	void dismissSetObject(robj *o, size_t size_hint) {
426	if (o->encoding == OBJ_ENCODING_HT) {
427	dict *set = o->ptr;
428	serverAssert(dictSize(set) != `0`);
429	/ We iterate all nodes only when average member size is bigger than a*
430	* page size, and there's a high chance we'll actually dismiss something. */
431	if (size_hint / dictSize(set) >= server.page_size) {
432	dictEntry *de;
433	dictIterator *di = dictGetIterator(set);
434	while ((de = dictNext(di)) != NULL) {
435	dismissSds(dictGetKey(de));
436	}
437	dictReleaseIterator(di);
438	}
439
440	/ Dismiss hash table memory. /
441	dismissMemory(set->ht_table[`0`], DICTHT_SIZE(set->ht_size_exp[`0`])*sizeof(dictEntry*));
442	dismissMemory(set->ht_table[`1`], DICTHT_SIZE(set->ht_size_exp[`1`])*sizeof(dictEntry*));
443	} else if (o->encoding == OBJ_ENCODING_INTSET) {
444	dismissMemory(o->ptr, intsetBlobLen((intset*)o->ptr));
445	} else {
446	serverPanic("Unknown set encoding type");
447	}
448	}
449
450	/ See dismissObject() /
451	void dismissZsetObject(robj *o, size_t size_hint) {
452	if (o->encoding == OBJ_ENCODING_SKIPLIST) {
453	zset *zs = o->ptr;
454	zskiplist *zsl = zs->zsl;
455	serverAssert(zsl->length != `0`);
456	/ We iterate all nodes only when average member size is bigger than a*
457	* page size, and there's a high chance we'll actually dismiss something. */
458	if (size_hint / zsl->length >= server.page_size) {
459	zskiplistNode *zn = zsl->tail;
460	while (zn != NULL) {
461	dismissSds(zn->ele);
462	zn = zn->backward;
463	}
464	}
465
466	/ Dismiss hash table memory. /
467	dict *d = zs->dict;
468	dismissMemory(d->ht_table[`0`], DICTHT_SIZE(d->ht_size_exp[`0`])*sizeof(dictEntry*));
469	dismissMemory(d->ht_table[`1`], DICTHT_SIZE(d->ht_size_exp[`1`])*sizeof(dictEntry*));
470	} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
471	dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr));
472	} else {
473	serverPanic("Unknown zset encoding type");
474	}
475	}
476
477	/ See dismissObject() /
478	void dismissHashObject(robj *o, size_t size_hint) {
479	if (o->encoding == OBJ_ENCODING_HT) {
480	dict *d = o->ptr;
481	serverAssert(dictSize(d) != `0`);
482	/ We iterate all fields only when average field/value size is bigger than*
483	* a page size, and there's a high chance we'll actually dismiss something. */
484	if (size_hint / dictSize(d) >= server.page_size) {
485	dictEntry *de;
486	dictIterator *di = dictGetIterator(d);
487	while ((de = dictNext(di)) != NULL) {
488	/ Only dismiss values memory since the field size*
489	* usually is small. */
490	dismissSds(dictGetVal(de));
491	}
492	dictReleaseIterator(di);
493	}
494
495	/ Dismiss hash table memory. /
496	dismissMemory(d->ht_table[`0`], DICTHT_SIZE(d->ht_size_exp[`0`])*sizeof(dictEntry*));
497	dismissMemory(d->ht_table[`1`], DICTHT_SIZE(d->ht_size_exp[`1`])*sizeof(dictEntry*));
498	} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
499	dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr));
500	} else {
501	serverPanic("Unknown hash encoding type");
502	}
503	}
504
505	/ See dismissObject() /
506	void dismissStreamObject(robj *o, size_t size_hint) {
507	stream *s = o->ptr;
508	rax *rax = s->rax;
509	if (raxSize(rax) == `0`) return;
510
511	/ Iterate only on stream entries, although size_hint may include serialized*
512	* consumer groups info, but usually, stream entries take up most of
513	* the space. */
514	if (size_hint / raxSize(rax) >= server.page_size) {
515	raxIterator ri;
516	raxStart(&ri,rax);
517	raxSeek(&ri,"^",NULL,`0`);
518	while (raxNext(&ri)) {
519	dismissMemory(ri.data, lpBytes(ri.data));
520	}
521	raxStop(&ri);
522	}
523	}
524
525	/ When creating a snapshot in a fork child process, the main process and child*
526	* process share the same physical memory pages, and if / when the parent
527	* modifies any keys due to write traffic, it'll cause CoW which consume
528	* physical memory. In the child process, after serializing the key and value,
529	* the data is definitely not accessed again, so to avoid unnecessary CoW, we
530	* try to release their memory back to OS. see dismissMemory().
531	*
532	* Because of the cost of iterating all node/field/member/entry of complex data
533	* types, we iterate and dismiss them only when approximate average we estimate
534	* the size of an individual allocation is more than a page size of OS.
535	* 'size_hint' is the size of serialized value. This method is not accurate, but
536	* it can reduce unnecessary iteration for complex data types that are probably
537	* not going to release any memory. */
538	void dismissObject(robj *o, size_t size_hint) {
539	/ madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. /
540	if (server.thp_enabled) return;
541
542	/ Currently we use zmadvise_dontneed only when we use jemalloc with Linux.*
543	* so we avoid these pointless loops when they're not going to do anything. */
544	#if defined(USE_JEMALLOC) && defined(__linux__)
545	if (o->refcount != `1`) return;
546	switch(o->type) {
547	case OBJ_STRING: dismissStringObject(o); break;
548	case OBJ_LIST: dismissListObject(o, size_hint); break;
549	case OBJ_SET: dismissSetObject(o, size_hint); break;
550	case OBJ_ZSET: dismissZsetObject(o, size_hint); break;
551	case OBJ_HASH: dismissHashObject(o, size_hint); break;
552	case OBJ_STREAM: dismissStreamObject(o, size_hint); break;
553	default: break;
554	}
555	#else
556	UNUSED(o); UNUSED(size_hint);
557	#endif
558	}
559
560	/ This variant of decrRefCount() gets its argument as void, and is useful*
561	* as free method in data structures that expect a 'void free_object(void*)'
562	* prototype for the free method. */
563	void decrRefCountVoid(void *o) {
564	decrRefCount(o);
565	}
566
567	int checkType(client c, robj o, int type) {
568	/ A NULL is considered an empty key /
569	if (o && o->type != type) {
570	addReplyErrorObject(c,shared.wrongtypeerr);
571	return `1`;
572	}
573	return `0`;
574	}
575
576	int isSdsRepresentableAsLongLong(sds s, long long *llval) {
577	return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
578	}
579
580	int isObjectRepresentableAsLongLong(robj o, long* long *llval) {
581	serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
582	if (o->encoding == OBJ_ENCODING_INT) {
583	if (llval) llval = (long*) o->ptr;
584	return C_OK;
585	} else {
586	return isSdsRepresentableAsLongLong(o->ptr,llval);
587	}
588	}
589
590	/ Optimize the SDS string inside the string object to require little space,*
591	* in case there is more than 10% of free space at the end of the SDS
592	* string. This happens because SDS strings tend to overallocate to avoid
593	* wasting too much time in allocations when appending to the string. */
594	void trimStringObjectIfNeeded(robj *o) {
595	if (o->encoding == OBJ_ENCODING_RAW &&
596	sdsavail(o->ptr) > sdslen(o->ptr)/`10`)
597	{
598	o->ptr = sdsRemoveFreeSpace(o->ptr);
599	}
600	}
601
602	/ Try to encode a string object in order to save space /
603	robj tryObjectEncoding(robj o) {
604	long value;
605	sds s = o->ptr;
606	size_t len;
607
608	/ Make sure this is a string object, the only type we encode*
609	* in this function. Other types use encoded memory efficient
610	* representations but are handled by the commands implementing
611	* the type. */
612	serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
613
614	/ We try some specialized encoding only for objects that are*
615	* RAW or EMBSTR encoded, in other words objects that are still
616	* in represented by an actually array of chars. */
617	if (!sdsEncodedObject(o)) return o;
618
619	/ It's not safe to encode shared objects: shared objects can be shared*
620	* everywhere in the "object space" of Redis and may end in places where
621	* they are not handled. We handle them only as values in the keyspace. */
622	if (o->refcount > `1`) return o;
623
624	/ Check if we can represent this string as a long integer.*
625	* Note that we are sure that a string larger than 20 chars is not
626	* representable as a 32 nor 64 bit integer. */
627	len = sdslen(s);
628	if (len <= `20` && string2l(s,len,&value)) {
629	/ This object is encodable as a long. Try to use a shared object.*
630	* Note that we avoid using shared integers when maxmemory is used
631	* because every object needs to have a private LRU field for the LRU
632	* algorithm to work well. */
633	if ((server.maxmemory == `0` \|\|
634	!(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
635	value >= `0` &&
636	value < OBJ_SHARED_INTEGERS)
637	{
638	decrRefCount(o);
639	incrRefCount(shared.integers[value]);
640	return shared.integers[value];
641	} else {
642	if (o->encoding == OBJ_ENCODING_RAW) {
643	sdsfree(o->ptr);
644	o->encoding = OBJ_ENCODING_INT;
645	o->ptr = (void*) value;
646	return o;
647	} else if (o->encoding == OBJ_ENCODING_EMBSTR) {
648	decrRefCount(o);
649	return createStringObjectFromLongLongForValue(value);
650	}
651	}
652	}
653
654	/ If the string is small and is still RAW encoded,*
655	* try the EMBSTR encoding which is more efficient.
656	* In this representation the object and the SDS string are allocated
657	* in the same chunk of memory to save space and cache misses. */
658	if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
659	robj *emb;
660
661	if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
662	emb = createEmbeddedStringObject(s,sdslen(s));
663	decrRefCount(o);
664	return emb;
665	}
666
667	/ We can't encode the object...*
668	*
669	* Do the last try, and at least optimize the SDS string inside
670	* the string object to require little space, in case there
671	* is more than 10% of free space at the end of the SDS string.
672	*
673	* We do that only for relatively large strings as this branch
674	* is only entered if the length of the string is greater than
675	* OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
676	trimStringObjectIfNeeded(o);
677
678	/ Return the original object. /
679	return o;
680	}
681
682	/ Get a decoded version of an encoded object (returned as a new object).*
683	* If the object is already raw-encoded just increment the ref count. */
684	robj getDecodedObject(robj o) {
685	robj *dec;
686
687	if (sdsEncodedObject(o)) {
688	incrRefCount(o);
689	return o;
690	}
691	if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
692	char buf[`32`];
693
694	ll2string(buf,`32`,(long)o->ptr);
695	dec = createStringObject(buf,strlen(buf));
696	return dec;
697	} else {
698	serverPanic("Unknown encoding type");
699	}
700	}
701
702	/ Compare two string objects via strcmp() or strcoll() depending on flags.*
703	* Note that the objects may be integer-encoded. In such a case we
704	* use ll2string() to get a string representation of the numbers on the stack
705	* and compare the strings, it's much faster than calling getDecodedObject().
706	*
707	* Important note: when REDIS_COMPARE_BINARY is used a binary-safe comparison
708	* is used. */
709
710	#define REDIS_COMPARE_BINARY (1<<0)
711	#define REDIS_COMPARE_COLL (1<<1)
712
713	int compareStringObjectsWithFlags(robj a, robj b, int flags) {
714	serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
715	char bufa[`128`], bufb[`128`], astr, bstr;
716	size_t alen, blen, minlen;
717
718	if (a == b) return `0`;
719	if (sdsEncodedObject(a)) {
720	astr = a->ptr;
721	alen = sdslen(astr);
722	} else {
723	alen = ll2string(bufa,sizeof(bufa),(long) a->ptr);
724	astr = bufa;
725	}
726	if (sdsEncodedObject(b)) {
727	bstr = b->ptr;
728	blen = sdslen(bstr);
729	} else {
730	blen = ll2string(bufb,sizeof(bufb),(long) b->ptr);
731	bstr = bufb;
732	}
733	if (flags & REDIS_COMPARE_COLL) {
734	return strcoll(astr,bstr);
735	} else {
736	int cmp;
737
738	minlen = (alen < blen) ? alen : blen;
739	cmp = memcmp(astr,bstr,minlen);
740	if (cmp == `0`) return alen-blen;
741	return cmp;
742	}
743	}
744
745	/ Wrapper for compareStringObjectsWithFlags() using binary comparison. /
746	int compareStringObjects(robj a, robj b) {
747	return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_BINARY);
748	}
749
750	/ Wrapper for compareStringObjectsWithFlags() using collation. /
751	int collateStringObjects(robj a, robj b) {
752	return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_COLL);
753	}
754
755	/ Equal string objects return 1 if the two objects are the same from the*
756	* point of view of a string comparison, otherwise 0 is returned. Note that
757	* this function is faster then checking for (compareStringObject(a,b) == 0)
758	* because it can perform some more optimization. */
759	int equalStringObjects(robj a, robj b) {
760	if (a->encoding == OBJ_ENCODING_INT &&
761	b->encoding == OBJ_ENCODING_INT){
762	/ If both strings are integer encoded just check if the stored*
763	* long is the same. */
764	return a->ptr == b->ptr;
765	} else {
766	return compareStringObjects(a,b) == `0`;
767	}
768	}
769
770	size_t stringObjectLen(robj *o) {
771	serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
772	if (sdsEncodedObject(o)) {
773	return sdslen(o->ptr);
774	} else {
775	return sdigits10((long)o->ptr);
776	}
777	}
778
779	int getDoubleFromObject(const robj o, double* *target) {
780	double value;
781
782	if (o == NULL) {
783	value = `0`;
784	} else {
785	serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
786	if (sdsEncodedObject(o)) {
787	if (!string2d(o->ptr, sdslen(o->ptr), &value))
788	return C_ERR;
789	} else if (o->encoding == OBJ_ENCODING_INT) {
790	value = (long)o->ptr;
791	} else {
792	serverPanic("Unknown string encoding");
793	}
794	}
795	*target = value;
796	return C_OK;
797	}
798
799	int getDoubleFromObjectOrReply(client c, robj o, double target, const* char *msg) {
800	double value;
801	if (getDoubleFromObject(o, &value) != C_OK) {
802	if (msg != NULL) {
803	addReplyError(c,(char*)msg);
804	} else {
805	addReplyError(c,"value is not a valid float");
806	}
807	return C_ERR;
808	}
809	*target = value;
810	return C_OK;
811	}
812
813	int getLongDoubleFromObject(robj o, long* double *target) {
814	long double value;
815
816	if (o == NULL) {
817	value = `0`;
818	} else {
819	serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
820	if (sdsEncodedObject(o)) {
821	if (!string2ld(o->ptr, sdslen(o->ptr), &value))
822	return C_ERR;
823	} else if (o->encoding == OBJ_ENCODING_INT) {
824	value = (long)o->ptr;
825	} else {
826	serverPanic("Unknown string encoding");
827	}
828	}
829	*target = value;
830	return C_OK;
831	}
832
833	int getLongDoubleFromObjectOrReply(client c, robj o, long double target, const* char *msg) {
834	long double value;
835	if (getLongDoubleFromObject(o, &value) != C_OK) {
836	if (msg != NULL) {
837	addReplyError(c,(char*)msg);
838	} else {
839	addReplyError(c,"value is not a valid float");
840	}
841	return C_ERR;
842	}
843	*target = value;
844	return C_OK;
845	}
846
847	int getLongLongFromObject(robj o, long* long *target) {
848	long long value;
849
850	if (o == NULL) {
851	value = `0`;
852	} else {
853	serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
854	if (sdsEncodedObject(o)) {
855	if (string2ll(o->ptr,sdslen(o->ptr),&value) == `0`) return C_ERR;
856	} else if (o->encoding == OBJ_ENCODING_INT) {
857	value = (long)o->ptr;
858	} else {
859	serverPanic("Unknown string encoding");
860	}
861	}
862	if (target) *target = value;
863	return C_OK;
864	}
865
866	int getLongLongFromObjectOrReply(client c, robj o, long long target, const* char *msg) {
867	long long value;
868	if (getLongLongFromObject(o, &value) != C_OK) {
869	if (msg != NULL) {
870	addReplyError(c,(char*)msg);
871	} else {
872	addReplyError(c,"value is not an integer or out of range");
873	}
874	return C_ERR;
875	}
876	*target = value;
877	return C_OK;
878	}
879
880	int getLongFromObjectOrReply(client c, robj o, long target, const* char *msg) {
881	long long value;
882
883	if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
884	if (value < LONG_MIN \|\| value > LONG_MAX) {
885	if (msg != NULL) {
886	addReplyError(c,(char*)msg);
887	} else {
888	addReplyError(c,"value is out of range");
889	}
890	return C_ERR;
891	}
892	*target = value;
893	return C_OK;
894	}
895
896	int getRangeLongFromObjectOrReply(client c, robj o, long min, long max, long target, const* char *msg) {
897	if (getLongFromObjectOrReply(c, o, target, msg) != C_OK) return C_ERR;
898	if (target < min \|\| target > max) {
899	if (msg != NULL) {
900	addReplyError(c,(char*)msg);
901	} else {
902	addReplyErrorFormat(c,"value is out of range, value must between %ld and %ld", min, max);
903	}
904	return C_ERR;
905	}
906	return C_OK;
907	}
908
909	int getPositiveLongFromObjectOrReply(client c, robj o, long target, const* char *msg) {
910	if (msg) {
911	return getRangeLongFromObjectOrReply(c, o, `0`, LONG_MAX, target, msg);
912	} else {
913	return getRangeLongFromObjectOrReply(c, o, `0`, LONG_MAX, target, "value is out of range, must be positive");
914	}
915	}
916
917	int getIntFromObjectOrReply(client c, robj o, int target, const* char *msg) {
918	long value;
919
920	if (getRangeLongFromObjectOrReply(c, o, INT_MIN, INT_MAX, &value, msg) != C_OK)
921	return C_ERR;
922
923	*target = value;
924	return C_OK;
925	}
926
927	char strEncoding(int* encoding) {
928	switch(encoding) {
929	case OBJ_ENCODING_RAW: return "raw";
930	case OBJ_ENCODING_INT: return "int";
931	case OBJ_ENCODING_HT: return "hashtable";
932	case OBJ_ENCODING_QUICKLIST: return "quicklist";
933	case OBJ_ENCODING_LISTPACK: return "listpack";
934	case OBJ_ENCODING_INTSET: return "intset";
935	case OBJ_ENCODING_SKIPLIST: return "skiplist";
936	case OBJ_ENCODING_EMBSTR: return "embstr";
937	case OBJ_ENCODING_STREAM: return "stream";
938	default: return "unknown";
939	}
940	}
941
942	/ =========================== Memory introspection ========================= /
943
944
945	/ This is a helper function with the goal of estimating the memory*
946	* size of a radix tree that is used to store Stream IDs.
947	*
948	* Note: to guess the size of the radix tree is not trivial, so we
949	* approximate it considering 16 bytes of data overhead for each
950	* key (the ID), and then adding the number of bare nodes, plus some
951	* overhead due by the data and child pointers. This secret recipe
952	* was obtained by checking the average radix tree created by real
953	* workloads, and then adjusting the constants to get numbers that
954	* more or less match the real memory usage.
955	*
956	* Actually the number of nodes and keys may be different depending
957	* on the insertion speed and thus the ability of the radix tree
958	* to compress prefixes. */
959	size_t streamRadixTreeMemoryUsage(rax *rax) {
960	size_t size = sizeof(*rax);
961	size = rax->numele * sizeof(streamID);
962	size += rax->numnodes * sizeof(raxNode);
963	/ Add a fixed overhead due to the aux data pointer, children, ... /
964	size += rax->numnodes * sizeof(long)*`30`;
965	return size;
966	}
967
968	/ Returns the size in bytes consumed by the key's value in RAM.*
969	* Note that the returned value is just an approximation, especially in the
970	* case of aggregated data types where only "sample_size" elements
971	* are checked and averaged to estimate the total size. */
972	#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
973	size_t objectComputeSize(robj key, robj o, size_t sample_size, int dbid) {
974	sds ele, ele2;
975	dict *d;
976	dictIterator *di;
977	struct dictEntry *de;
978	size_t asize = `0`, elesize = `0`, samples = `0`;
979
980	if (o->type == OBJ_STRING) {
981	if(o->encoding == OBJ_ENCODING_INT) {
982	asize = sizeof(*o);
983	} else if(o->encoding == OBJ_ENCODING_RAW) {
984	asize = sdsZmallocSize(o->ptr)+sizeof(*o);
985	} else if(o->encoding == OBJ_ENCODING_EMBSTR) {
986	asize = zmalloc_size((void *)o);
987	} else {
988	serverPanic("Unknown string encoding");
989	}
990	} else if (o->type == OBJ_LIST) {
991	if (o->encoding == OBJ_ENCODING_QUICKLIST) {
992	quicklist *ql = o->ptr;
993	quicklistNode *node = ql->head;
994	asize = sizeof(o)+sizeof*(quicklist);
995	do {
996	elesize += sizeof(quicklistNode)+zmalloc_size(node->entry);
997	samples++;
998	} while ((node = node->next) && samples < sample_size);
999	asize += (double)elesize/samples*ql->len;
1000	} else {
1001	serverPanic("Unknown list encoding");
1002	}
1003	} else if (o->type == OBJ_SET) {
1004	if (o->encoding == OBJ_ENCODING_HT) {
1005	d = o->ptr;
1006	di = dictGetIterator(d);
1007	asize = sizeof(o)+sizeof(dict)+(sizeof(struct* dictEntry)dictSlots(d));
1008	while((de = dictNext(di)) != NULL && samples < sample_size) {
1009	ele = dictGetKey(de);
1010	elesize += sizeof(struct dictEntry) + sdsZmallocSize(ele);
1011	samples++;
1012	}
1013	dictReleaseIterator(di);
1014	if (samples) asize += (double)elesize/samples*dictSize(d);
1015	} else if (o->encoding == OBJ_ENCODING_INTSET) {
1016	asize = sizeof(*o)+zmalloc_size(o->ptr);
1017	} else {
1018	serverPanic("Unknown set encoding");
1019	}
1020	} else if (o->type == OBJ_ZSET) {
1021	if (o->encoding == OBJ_ENCODING_LISTPACK) {
1022	asize = sizeof(*o)+zmalloc_size(o->ptr);
1023	} else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
1024	d = ((zset*)o->ptr)->dict;
1025	zskiplist zsl = ((zset)o->ptr)->zsl;
1026	zskiplistNode *znode = zsl->header->level[`0`].forward;
1027	asize = sizeof(o)+sizeof(zset)+sizeof(zskiplist)+sizeof*(dict)+
1028	(sizeof(struct dictEntry)dictSlots(d))+
1029	zmalloc_size(zsl->header);
1030	while(znode != NULL && samples < sample_size) {
1031	elesize += sdsZmallocSize(znode->ele);
1032	elesize += sizeof(struct dictEntry)+zmalloc_size(znode);
1033	samples++;
1034	znode = znode->level[`0`].forward;
1035	}
1036	if (samples) asize += (double)elesize/samples*dictSize(d);
1037	} else {
1038	serverPanic("Unknown sorted set encoding");
1039	}
1040	} else if (o->type == OBJ_HASH) {
1041	if (o->encoding == OBJ_ENCODING_LISTPACK) {
1042	asize = sizeof(*o)+zmalloc_size(o->ptr);
1043	} else if (o->encoding == OBJ_ENCODING_HT) {
1044	d = o->ptr;
1045	di = dictGetIterator(d);
1046	asize = sizeof(o)+sizeof(dict)+(sizeof(struct* dictEntry)dictSlots(d));
1047	while((de = dictNext(di)) != NULL && samples < sample_size) {
1048	ele = dictGetKey(de);
1049	ele2 = dictGetVal(de);
1050	elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
1051	elesize += sizeof(struct dictEntry);
1052	samples++;
1053	}
1054	dictReleaseIterator(di);
1055	if (samples) asize += (double)elesize/samples*dictSize(d);
1056	} else {
1057	serverPanic("Unknown hash encoding");
1058	}
1059	} else if (o->type == OBJ_STREAM) {
1060	stream *s = o->ptr;
1061	asize = sizeof(o)+sizeof(s);
1062	asize += streamRadixTreeMemoryUsage(s->rax);
1063
1064	/ Now we have to add the listpacks. The last listpack is often non*
1065	* complete, so we estimate the size of the first N listpacks, and
1066	* use the average to compute the size of the first N-1 listpacks, and
1067	* finally add the real size of the last node. */
1068	raxIterator ri;
1069	raxStart(&ri,s->rax);
1070	raxSeek(&ri,"^",NULL,`0`);
1071	size_t lpsize = `0`, samples = `0`;
1072	while(samples < sample_size && raxNext(&ri)) {
1073	unsigned char *lp = ri.data;
1074	lpsize += lpBytes(lp);
1075	samples++;
1076	}
1077	if (s->rax->numele <= samples) {
1078	asize += lpsize;
1079	} else {
1080	if (samples) lpsize /= samples; / Compute the average. /
1081	asize += lpsize * (s->rax->numele-`1`);
1082	/ No need to check if seek succeeded, we enter this branch only*
1083	* if there are a few elements in the radix tree. */
1084	raxSeek(&ri,"$",NULL,`0`);
1085	raxNext(&ri);
1086	asize += lpBytes(ri.data);
1087	}
1088	raxStop(&ri);
1089
1090	/ Consumer groups also have a non trivial memory overhead if there*
1091	* are many consumers and many groups, let's count at least the
1092	* overhead of the pending entries in the groups and consumers
1093	* PELs. */
1094	if (s->cgroups) {
1095	raxStart(&ri,s->cgroups);
1096	raxSeek(&ri,"^",NULL,`0`);
1097	while(raxNext(&ri)) {
1098	streamCG *cg = ri.data;
1099	asize += sizeof(*cg);
1100	asize += streamRadixTreeMemoryUsage(cg->pel);
1101	asize += sizeof(streamNACK)*raxSize(cg->pel);
1102
1103	/ For each consumer we also need to add the basic data*
1104	* structures and the PEL memory usage. */
1105	raxIterator cri;
1106	raxStart(&cri,cg->consumers);
1107	raxSeek(&cri,"^",NULL,`0`);
1108	while(raxNext(&cri)) {
1109	streamConsumer *consumer = cri.data;
1110	asize += sizeof(*consumer);
1111	asize += sdslen(consumer->name);
1112	asize += streamRadixTreeMemoryUsage(consumer->pel);
1113	/ Don't count NACKs again, they are shared with the*
1114	* consumer group PEL. */
1115	}
1116	raxStop(&cri);
1117	}
1118	raxStop(&ri);
1119	}
1120	} else if (o->type == OBJ_MODULE) {
1121	asize = moduleGetMemUsage(key, o, sample_size, dbid);
1122	} else {
1123	serverPanic("Unknown object type");
1124	}
1125	return asize;
1126	}
1127
1128	/ Release data obtained with getMemoryOverheadData(). /
1129	void freeMemoryOverheadData(struct redisMemOverhead *mh) {
1130	zfree(mh->db);
1131	zfree(mh);
1132	}
1133
1134	/ Return a struct redisMemOverhead filled with memory overhead*
1135	* information used for the MEMORY OVERHEAD and INFO command. The returned
1136	* structure pointer should be freed calling freeMemoryOverheadData(). */
1137	struct redisMemOverhead getMemoryOverheadData(void*) {
1138	int j;
1139	size_t mem_total = `0`;
1140	size_t mem = `0`;
1141	size_t zmalloc_used = zmalloc_used_memory();
1142	struct redisMemOverhead mh = zcalloc(sizeof(mh));
1143
1144	mh->total_allocated = zmalloc_used;
1145	mh->startup_allocated = server.initial_memory_usage;
1146	mh->peak_allocated = server.stat_peak_memory;
1147	mh->total_frag =
1148	(float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
1149	mh->total_frag_bytes =
1150	server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
1151	mh->allocator_frag =
1152	(float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
1153	mh->allocator_frag_bytes =
1154	server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
1155	mh->allocator_rss =
1156	(float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
1157	mh->allocator_rss_bytes =
1158	server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
1159	mh->rss_extra =
1160	(float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
1161	mh->rss_extra_bytes =
1162	server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
1163
1164	mem_total += server.initial_memory_usage;
1165
1166	/ Replication backlog and replicas share one global replication buffer,*
1167	* only if replication buffer memory is more than the repl backlog setting,
1168	* we consider the excess as replicas' memory. Otherwise, replication buffer
1169	* memory is the consumption of repl backlog. */
1170	if (listLength(server.slaves) &&
1171	(long long)server.repl_buffer_mem > server.repl_backlog_size)
1172	{
1173	mh->clients_slaves = server.repl_buffer_mem - server.repl_backlog_size;
1174	mh->repl_backlog = server.repl_backlog_size;
1175	} else {
1176	mh->clients_slaves = `0`;
1177	mh->repl_backlog = server.repl_buffer_mem;
1178	}
1179	if (server.repl_backlog) {
1180	/ The approximate memory of rax tree for indexed blocks. /
1181	mh->repl_backlog +=
1182	server.repl_backlog->blocks_index->numnodes * sizeof(raxNode) +
1183	raxSize(server.repl_backlog->blocks_index) * sizeof(void*);
1184	}
1185	mem_total += mh->repl_backlog;
1186	mem_total += mh->clients_slaves;
1187
1188	/ Computing the memory used by the clients would be O(N) if done*
1189	* here online. We use our values computed incrementally by
1190	* updateClientMemUsage(). */
1191	mh->clients_normal = server.stat_clients_type_memory[CLIENT_TYPE_MASTER]+
1192	server.stat_clients_type_memory[CLIENT_TYPE_PUBSUB]+
1193	server.stat_clients_type_memory[CLIENT_TYPE_NORMAL];
1194	mem_total += mh->clients_normal;
1195
1196	mh->cluster_links = server.stat_cluster_links_memory;
1197	mem_total += mh->cluster_links;
1198
1199	mem = `0`;
1200	if (server.aof_state != AOF_OFF) {
1201	mem += sdsZmallocSize(server.aof_buf);
1202	}
1203	mh->aof_buffer = mem;
1204	mem_total+=mem;
1205
1206	mem = evalScriptsMemory();
1207	mh->lua_caches = mem;
1208	mem_total+=mem;
1209	mh->functions_caches = functionsMemoryOverhead();
1210	mem_total+=mh->functions_caches;
1211
1212	for (j = `0`; j < server.dbnum; j++) {
1213	redisDb *db = server.db+j;
1214	long long keyscount = dictSize(db->dict);
1215	if (keyscount==`0`) continue;
1216
1217	mh->total_keys += keyscount;
1218	mh->db = zrealloc(mh->db,sizeof(mh->db[`0`])*(mh->num_dbs+`1`));
1219	mh->db[mh->num_dbs].dbid = j;
1220
1221	mem = dictSize(db->dict) * sizeof(dictEntry) +
1222	dictSlots(db->dict) * sizeof(dictEntry*) +
1223	dictSize(db->dict) * sizeof(robj);
1224	mh->db[mh->num_dbs].overhead_ht_main = mem;
1225	mem_total+=mem;
1226
1227	mem = dictSize(db->expires) * sizeof(dictEntry) +
1228	dictSlots(db->expires) * sizeof(dictEntry*);
1229	mh->db[mh->num_dbs].overhead_ht_expires = mem;
1230	mem_total+=mem;
1231
1232	/ Account for the slot to keys map in cluster mode /
1233	mem = dictSize(db->dict) * dictMetadataSize(db->dict);
1234	mh->db[mh->num_dbs].overhead_ht_slot_to_keys = mem;
1235	mem_total+=mem;
1236
1237	mh->num_dbs++;
1238	}
1239
1240	mh->overhead_total = mem_total;
1241	mh->dataset = zmalloc_used - mem_total;
1242	mh->peak_perc = (float)zmalloc_used*`100`/mh->peak_allocated;
1243
1244	/ Metrics computed after subtracting the startup memory from*
1245	* the total memory. */
1246	size_t net_usage = `1`;
1247	if (zmalloc_used > mh->startup_allocated)
1248	net_usage = zmalloc_used - mh->startup_allocated;
1249	mh->dataset_perc = (float)mh->dataset*`100`/net_usage;
1250	mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : `0`;
1251
1252	return mh;
1253	}
1254
1255	/ Helper for "MEMORY allocator-stats", used as a callback for the jemalloc*
1256	* stats output. */
1257	void inputCatSds(void result, const* char *str) {
1258	/ result is actually a (sds ), so re-cast it here /*
1259	sds info = (sds )result;
1260	info = sdscat(info, str);
1261	}
1262
1263	/ This implements MEMORY DOCTOR. An human readable analysis of the Redis*
1264	* memory condition. */
1265	sds getMemoryDoctorReport(void) {
1266	int empty = `0`; / Instance is empty or almost empty. /
1267	int big_peak = `0`; / Memory peak is much larger than used mem. /
1268	int high_frag = `0`; / High fragmentation. /
1269	int high_alloc_frag = `0`;/ High allocator fragmentation. /
1270	int high_proc_rss = `0`; / High process rss overhead. /
1271	int high_alloc_rss = `0`; / High rss overhead. /
1272	int big_slave_buf = `0`; / Slave buffers are too big. /
1273	int big_client_buf = `0`; / Client buffers are too big. /
1274	int many_scripts = `0`; / Script cache has too many scripts. /
1275	int num_reports = `0`;
1276	struct redisMemOverhead *mh = getMemoryOverheadData();
1277
1278	if (mh->total_allocated < (`1024``1024``5`)) {
1279	empty = `1`;
1280	num_reports++;
1281	} else {
1282	/ Peak is > 150% of current used memory? /
1283	if (((float)mh->peak_allocated / mh->total_allocated) > `1.5`) {
1284	big_peak = `1`;
1285	num_reports++;
1286	}
1287
1288	/ Fragmentation is higher than 1.4 and 10MB ?/
1289	if (mh->total_frag > `1.4` && mh->total_frag_bytes > `10`<<`20`) {
1290	high_frag = `1`;
1291	num_reports++;
1292	}
1293
1294	/ External fragmentation is higher than 1.1 and 10MB? /
1295	if (mh->allocator_frag > `1.1` && mh->allocator_frag_bytes > `10`<<`20`) {
1296	high_alloc_frag = `1`;
1297	num_reports++;
1298	}
1299
1300	/ Allocator rss is higher than 1.1 and 10MB ? /
1301	if (mh->allocator_rss > `1.1` && mh->allocator_rss_bytes > `10`<<`20`) {
1302	high_alloc_rss = `1`;
1303	num_reports++;
1304	}
1305
1306	/ Non-Allocator rss is higher than 1.1 and 10MB ? /
1307	if (mh->rss_extra > `1.1` && mh->rss_extra_bytes > `10`<<`20`) {
1308	high_proc_rss = `1`;
1309	num_reports++;
1310	}
1311
1312	/ Clients using more than 200k each average? /
1313	long numslaves = listLength(server.slaves);
1314	long numclients = listLength(server.clients)-numslaves;
1315	if (mh->clients_normal / numclients > (`1024`*`200`)) {
1316	big_client_buf = `1`;
1317	num_reports++;
1318	}
1319
1320	/ Slaves using more than 10 MB each? /
1321	if (numslaves > `0` && mh->clients_slaves > (`1024``1024``10`)) {
1322	big_slave_buf = `1`;
1323	num_reports++;
1324	}
1325
1326	/ Too many scripts are cached? /
1327	if (dictSize(evalScriptsDict()) > `1000`) {
1328	many_scripts = `1`;
1329	num_reports++;
1330	}
1331	}
1332
1333	sds s;
1334	if (num_reports == `0`) {
1335	s = sdsnew(
1336	"Hi Sam, I can't find any memory issue in your instance. "
1337	"I can only account for what occurs on this base.\n");
1338	} else if (empty == `1`) {
1339	s = sdsnew(
1340	"Hi Sam, this instance is empty or is using very little memory, "
1341	"my issues detector can't be used in these conditions. "
1342	"Please, leave for your mission on Earth and fill it with some data. "
1343	"The new Sam and I will be back to our programming as soon as I "
1344	"finished rebooting.\n");
1345	} else {
1346	s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
1347	if (big_peak) {
1348	s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
1349	}
1350	if (high_frag) {
1351	s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
1352	}
1353	if (high_alloc_frag) {
1354	s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
1355	}
1356	if (high_alloc_rss) {
1357	s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
1358	}
1359	if (high_proc_rss) {
1360	s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
1361	}
1362	if (big_slave_buf) {
1363	s = sdscat(s," * Big replica buffers: The replica output buffers in this instance are greater than 10MB for each replica (on average). This likely means that there is some replica instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what replica is not receiving data correctly and why. You can use the INFO output in order to check the replicas delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
1364	}
1365	if (big_client_buf) {
1366	s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
1367	}
1368	if (many_scripts) {
1369	s = sdscat(s," * Many scripts: There seem to be many cached scripts in this instance (more than 1000). This may be because scripts are generated and `EVAL`ed, instead of being parameterized (with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called periodically, the scripts' caches may end up consuming most of your memory.\n\n");
1370	}
1371	s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
1372	}
1373	freeMemoryOverheadData(mh);
1374	return s;
1375	}
1376
1377	/ Set the object LRU/LFU depending on server.maxmemory_policy.*
1378	* The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
1379	* The lru_idle and lru_clock args are only relevant if policy
1380	* is MAXMEMORY_FLAG_LRU.
1381	* Either or both of them may be <0, in that case, nothing is set. */
1382	int objectSetLRUOrLFU(robj val, long* long lfu_freq, long long lru_idle,
1383	long long lru_clock, int lru_multiplier) {
1384	if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1385	if (lfu_freq >= `0`) {
1386	serverAssert(lfu_freq <= `255`);
1387	val->lru = (LFUGetTimeInMinutes()<<`8`) \| lfu_freq;
1388	return `1`;
1389	}
1390	} else if (lru_idle >= `0`) {
1391	/ Provided LRU idle time is in seconds. Scale*
1392	* according to the LRU clock resolution this Redis
1393	* instance was compiled with (normally 1000 ms, so the
1394	* below statement will expand to lru_idle1000/1000. /
1395	lru_idle = lru_idle*lru_multiplier/LRU_CLOCK_RESOLUTION;
1396	long lru_abs = lru_clock - lru_idle; / Absolute access time. /
1397	/ If the LRU field underflows (since lru_clock is a wrapping clock),*
1398	* we need to make it positive again. This be handled by the unwrapping
1399	* code in estimateObjectIdleTime. I.e. imagine a day when lru_clock
1400	* wrap arounds (happens once in some 6 months), and becomes a low
1401	* value, like 10, an lru_idle of 1000 should be near LRU_CLOCK_MAX. */
1402	if (lru_abs < `0`)
1403	lru_abs += LRU_CLOCK_MAX;
1404	val->lru = lru_abs;
1405	return `1`;
1406	}
1407	return `0`;
1408	}
1409
1410	/ ======================= The OBJECT and MEMORY commands =================== /
1411
1412	/ This is a helper function for the OBJECT command. We need to lookup keys*
1413	* without any modification of LRU or other parameters. */
1414	robj objectCommandLookup(client c, robj *key) {
1415	return lookupKeyReadWithFlags(c->db,key,LOOKUP_NOTOUCH\|LOOKUP_NONOTIFY);
1416	}
1417
1418	robj objectCommandLookupOrReply(client c, robj key, robj reply) {
1419	robj *o = objectCommandLookup(c,key);
1420	if (!o) addReplyOrErrorObject(c, reply);
1421	return o;
1422	}
1423
1424	/ Object command allows to inspect the internals of a Redis Object.*
1425	* Usage: OBJECT <refcount\|encoding\|idletime\|freq> <key> */
1426	void objectCommand(client *c) {
1427	robj *o;
1428
1429	if (c->argc == `2` && !strcasecmp(c->argv[`1`]->ptr,"help")) {
1430	const char *help[] = {
1431	"ENCODING <key>",
1432	" Return the kind of internal representation used in order to store the value",
1433	" associated with a <key>.",
1434	"FREQ <key>",
1435	" Return the access frequency index of the <key>. The returned integer is",
1436	" proportional to the logarithm of the recent access frequency of the key.",
1437	"IDLETIME <key>",
1438	" Return the idle time of the <key>, that is the approximated number of",
1439	" seconds elapsed since the last access to the key.",
1440	"REFCOUNT <key>",
1441	" Return the number of references of the value associated with the specified",
1442	" <key>.",
1443	NULL
1444	};
1445	addReplyHelp(c, help);
1446	} else if (!strcasecmp(c->argv[`1`]->ptr,"refcount") && c->argc == `3`) {
1447	if ((o = objectCommandLookupOrReply(c,c->argv[`2`],shared.null[c->resp]))
1448	== NULL) return;
1449	addReplyLongLong(c,o->refcount);
1450	} else if (!strcasecmp(c->argv[`1`]->ptr,"encoding") && c->argc == `3`) {
1451	if ((o = objectCommandLookupOrReply(c,c->argv[`2`],shared.null[c->resp]))
1452	== NULL) return;
1453	addReplyBulkCString(c,strEncoding(o->encoding));
1454	} else if (!strcasecmp(c->argv[`1`]->ptr,"idletime") && c->argc == `3`) {
1455	if ((o = objectCommandLookupOrReply(c,c->argv[`2`],shared.null[c->resp]))
1456	== NULL) return;
1457	if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1458	addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1459	return;
1460	}
1461	addReplyLongLong(c,estimateObjectIdleTime(o)/`1000`);
1462	} else if (!strcasecmp(c->argv[`1`]->ptr,"freq") && c->argc == `3`) {
1463	if ((o = objectCommandLookupOrReply(c,c->argv[`2`],shared.null[c->resp]))
1464	== NULL) return;
1465	if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
1466	addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1467	return;
1468	}
1469	/ LFUDecrAndReturn should be called*
1470	* in case of the key has not been accessed for a long time,
1471	* because we update the access time only
1472	* when the key is read or overwritten. */
1473	addReplyLongLong(c,LFUDecrAndReturn(o));
1474	} else {
1475	addReplySubcommandSyntaxError(c);
1476	}
1477	}
1478
1479	/ The memory command will eventually be a complete interface for the*
1480	* memory introspection capabilities of Redis.
1481	*
1482	* Usage: MEMORY usage <key> */
1483	void memoryCommand(client *c) {
1484	if (!strcasecmp(c->argv[`1`]->ptr,"help") && c->argc == `2`) {
1485	const char *help[] = {
1486	"DOCTOR",
1487	" Return memory problems reports.",
1488	"MALLOC-STATS",
1489	" Return internal statistics report from the memory allocator.",
1490	"PURGE",
1491	" Attempt to purge dirty pages for reclamation by the allocator.",
1492	"STATS",
1493	" Return information about the memory usage of the server.",
1494	"USAGE <key> [SAMPLES <count>]",
1495	" Return memory in bytes used by <key> and its value. Nested values are",
1496	" sampled up to <count> times (default: 5, 0 means sample all).",
1497	NULL
1498	};
1499	addReplyHelp(c, help);
1500	} else if (!strcasecmp(c->argv[`1`]->ptr,"usage") && c->argc >= `3`) {
1501	dictEntry *de;
1502	long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
1503	for (int j = `3`; j < c->argc; j++) {
1504	if (!strcasecmp(c->argv[j]->ptr,"samples") &&
1505	j+`1` < c->argc)
1506	{
1507	if (getLongLongFromObjectOrReply(c,c->argv[j+`1`],&samples,NULL)
1508	== C_ERR) return;
1509	if (samples < `0`) {
1510	addReplyErrorObject(c,shared.syntaxerr);
1511	return;
1512	}
1513	if (samples == `0`) samples = LLONG_MAX;
1514	j++; / skip option argument. /
1515	} else {
1516	addReplyErrorObject(c,shared.syntaxerr);
1517	return;
1518	}
1519	}
1520	if ((de = dictFind(c->db->dict,c->argv[`2`]->ptr)) == NULL) {
1521	addReplyNull(c);
1522	return;
1523	}
1524	size_t usage = objectComputeSize(c->argv[`2`],dictGetVal(de),samples,c->db->id);
1525	usage += sdsZmallocSize(dictGetKey(de));
1526	usage += sizeof(dictEntry);
1527	usage += dictMetadataSize(c->db->dict);
1528	addReplyLongLong(c,usage);
1529	} else if (!strcasecmp(c->argv[`1`]->ptr,"stats") && c->argc == `2`) {
1530	struct redisMemOverhead *mh = getMemoryOverheadData();
1531
1532	addReplyMapLen(c,`27`+mh->num_dbs);
1533
1534	addReplyBulkCString(c,"peak.allocated");
1535	addReplyLongLong(c,mh->peak_allocated);
1536
1537	addReplyBulkCString(c,"total.allocated");
1538	addReplyLongLong(c,mh->total_allocated);
1539
1540	addReplyBulkCString(c,"startup.allocated");
1541	addReplyLongLong(c,mh->startup_allocated);
1542
1543	addReplyBulkCString(c,"replication.backlog");
1544	addReplyLongLong(c,mh->repl_backlog);
1545
1546	addReplyBulkCString(c,"clients.slaves");
1547	addReplyLongLong(c,mh->clients_slaves);
1548
1549	addReplyBulkCString(c,"clients.normal");
1550	addReplyLongLong(c,mh->clients_normal);
1551
1552	addReplyBulkCString(c,"cluster.links");
1553	addReplyLongLong(c,mh->cluster_links);
1554
1555	addReplyBulkCString(c,"aof.buffer");
1556	addReplyLongLong(c,mh->aof_buffer);
1557
1558	addReplyBulkCString(c,"lua.caches");
1559	addReplyLongLong(c,mh->lua_caches);
1560
1561	addReplyBulkCString(c,"functions.caches");
1562	addReplyLongLong(c,mh->functions_caches);
1563
1564	for (size_t j = `0`; j < mh->num_dbs; j++) {
1565	char dbname[`32`];
1566	snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
1567	addReplyBulkCString(c,dbname);
1568	addReplyMapLen(c,`3`);
1569
1570	addReplyBulkCString(c,"overhead.hashtable.main");
1571	addReplyLongLong(c,mh->db[j].overhead_ht_main);
1572
1573	addReplyBulkCString(c,"overhead.hashtable.expires");
1574	addReplyLongLong(c,mh->db[j].overhead_ht_expires);
1575
1576	addReplyBulkCString(c,"overhead.hashtable.slot-to-keys");
1577	addReplyLongLong(c,mh->db[j].overhead_ht_slot_to_keys);
1578	}
1579
1580
1581	addReplyBulkCString(c,"overhead.total");
1582	addReplyLongLong(c,mh->overhead_total);
1583
1584	addReplyBulkCString(c,"keys.count");
1585	addReplyLongLong(c,mh->total_keys);
1586
1587	addReplyBulkCString(c,"keys.bytes-per-key");
1588	addReplyLongLong(c,mh->bytes_per_key);
1589
1590	addReplyBulkCString(c,"dataset.bytes");
1591	addReplyLongLong(c,mh->dataset);
1592
1593	addReplyBulkCString(c,"dataset.percentage");
1594	addReplyDouble(c,mh->dataset_perc);
1595
1596	addReplyBulkCString(c,"peak.percentage");
1597	addReplyDouble(c,mh->peak_perc);
1598
1599	addReplyBulkCString(c,"allocator.allocated");
1600	addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
1601
1602	addReplyBulkCString(c,"allocator.active");
1603	addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
1604
1605	addReplyBulkCString(c,"allocator.resident");
1606	addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
1607
1608	addReplyBulkCString(c,"allocator-fragmentation.ratio");
1609	addReplyDouble(c,mh->allocator_frag);
1610
1611	addReplyBulkCString(c,"allocator-fragmentation.bytes");
1612	addReplyLongLong(c,mh->allocator_frag_bytes);
1613
1614	addReplyBulkCString(c,"allocator-rss.ratio");
1615	addReplyDouble(c,mh->allocator_rss);
1616
1617	addReplyBulkCString(c,"allocator-rss.bytes");
1618	addReplyLongLong(c,mh->allocator_rss_bytes);
1619
1620	addReplyBulkCString(c,"rss-overhead.ratio");
1621	addReplyDouble(c,mh->rss_extra);
1622
1623	addReplyBulkCString(c,"rss-overhead.bytes");
1624	addReplyLongLong(c,mh->rss_extra_bytes);
1625
1626	addReplyBulkCString(c,"fragmentation"); / this is the total RSS overhead, including fragmentation /
1627	addReplyDouble(c,mh->total_frag); / it is kept here for backwards compatibility /
1628
1629	addReplyBulkCString(c,"fragmentation.bytes");
1630	addReplyLongLong(c,mh->total_frag_bytes);
1631
1632	freeMemoryOverheadData(mh);
1633	} else if (!strcasecmp(c->argv[`1`]->ptr,"malloc-stats") && c->argc == `2`) {
1634	#if defined(USE_JEMALLOC)
1635	sds info = sdsempty();
1636	je_malloc_stats_print(inputCatSds, &info, NULL);
1637	addReplyVerbatim(c,info,sdslen(info),"txt");
1638	sdsfree(info);
1639	#else
1640	addReplyBulkCString(c,"Stats not supported for the current allocator");
1641	#endif
1642	} else if (!strcasecmp(c->argv[`1`]->ptr,"doctor") && c->argc == `2`) {
1643	sds report = getMemoryDoctorReport();
1644	addReplyVerbatim(c,report,sdslen(report),"txt");
1645	sdsfree(report);
1646	} else if (!strcasecmp(c->argv[`1`]->ptr,"purge") && c->argc == `2`) {
1647	if (jemalloc_purge() == `0`)
1648	addReply(c, shared.ok);
1649	else
1650	addReplyError(c, "Error purging dirty pages");
1651	} else {
1652	addReplySubcommandSyntaxError(c);
1653	}
1654	}
1655

Browse the source code of redis/src/object.c