作者:陈科
联系方式:chenke1818@gmail.com
转载请说明出处:http://www.dumpcache.com/wiki/doku.php?id=about_redis_6
上一章我们在分析keys相关命令的时候发现,Redis最终对数据的存储都是基于redisDb的实现,所以我们这章先来分析下redisDb的实现,同时把keys相关命令的最后两个SORT和SCAN的实现分析一下。
redisDb最初的初始化是在initServer的时候:
server.db = zmalloc(sizeof(redisDb)*server.dbnum);
dbnum默认为16:
server.dbnum = REDIS_DEFAULT_DBNUM; #define REDIS_DEFAULT_DBNUM 16
我们再来看下redisDb的结构:
typedef struct redisDb { dict *dict; /* The keyspace for this DB */ dict *expires; /* Timeout of keys with a timeout set */ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */ dict *ready_keys; /* Blocked keys that received a PUSH */ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ int id; long long avg_ttl; /* Average TTL, just for stats */ } redisDb;
redis的注释写的还是蛮清楚的,我们可以发现,redisDb其实就是对dict字典的封装。
dict *dict存储了key-value的映射。
dict *expires存储了key和过期时间的映射。
至于dict的实现,这里先不展开,我们只要知道它其实就是一个类似hashmap的实现即可。
命令格式为:
SORT key [BY pattern] [LIMIT offset count] [GET pattern [GET pattern …]] [ASC | DESC] [ALPHA] [STORE destination]
具体玩法可以参考:
我们重点来分析源码:
/* The SORT command is the most complex command in Redis. Warning: this code * is optimized for speed and a bit less for readability */ void sortCommand(redisClient *c) { list *operations; unsigned int outputlen = 0; int desc = 0, alpha = 0; long limit_start = 0, limit_count = -1, start, end; int j, dontsort = 0, vectorlen; int getop = 0; /* GET operation counter */ int int_convertion_error = 0; robj *sortval, *sortby = NULL, *storekey = NULL; redisSortObject *vector; /* Resulting vector to sort */ /* Lookup the key to sort. It must be of the right types */ sortval = lookupKeyRead(c->db,c->argv[1]); if (sortval && sortval->type != REDIS_SET && sortval->type != REDIS_LIST && sortval->type != REDIS_ZSET) { addReply(c,shared.wrongtypeerr); return; } /* Create a list of operations to perform for every sorted element. * Operations can be GET/DEL/INCR/DECR */ operations = listCreate(); listSetFreeMethod(operations,zfree); j = 2; /* options start at argv[2] */ /* Now we need to protect sortval incrementing its count, in the future * SORT may have options able to overwrite/delete keys during the sorting * and the sorted key itself may get destroyed */ if (sortval) incrRefCount(sortval); else sortval = createListObject(); /* The SORT command has an SQL-alike syntax, parse it */ while(j < c->argc) { int leftargs = c->argc-j-1; if (!strcasecmp(c->argv[j]->ptr,"asc")) { desc = 0; } else if (!strcasecmp(c->argv[j]->ptr,"desc")) { desc = 1; } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) { alpha = 1; } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) { if ((getLongFromObjectOrReply(c, c->argv[j+1], &limit_start, NULL) != REDIS_OK) || (getLongFromObjectOrReply(c, c->argv[j+2], &limit_count, NULL) != REDIS_OK)) { decrRefCount(sortval); listRelease(operations); return; } j+=2; } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) { storekey = c->argv[j+1]; j++; } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) { sortby = c->argv[j+1]; /* If the BY pattern does not contain '*', i.e. it is constant, * we don't need to sort nor to lookup the weight keys. */ if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1; j++; } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) { listAddNodeTail(operations,createSortOperation( REDIS_SORT_GET,c->argv[j+1])); getop++; j++; } else { decrRefCount(sortval); listRelease(operations); addReply(c,shared.syntaxerr); return; } j++; } /* When sorting a set with no sort specified, we must sort the output * so the result is consistent across scripting and replication. * * The other types (list, sorted set) will retain their native order * even if no sort order is requested, so they remain stable across * scripting and replication. */ if (dontsort && sortval->type == REDIS_SET && (storekey || c->flags & REDIS_LUA_CLIENT)) { /* Force ALPHA sorting */ dontsort = 0; alpha = 1; sortby = NULL; } /* Destructively convert encoded sorted sets for SORT. */ if (sortval->type == REDIS_ZSET) zsetConvert(sortval, REDIS_ENCODING_SKIPLIST); /* Objtain the length of the object to sort. */ switch(sortval->type) { case REDIS_LIST: vectorlen = listTypeLength(sortval); break; case REDIS_SET: vectorlen = setTypeSize(sortval); break; case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break; default: vectorlen = 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */ } /* Perform LIMIT start,count sanity checking. */ start = (limit_start < 0) ? 0 : limit_start; end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1; if (start >= vectorlen) { start = vectorlen-1; end = vectorlen-2; } if (end >= vectorlen) end = vectorlen-1; /* Optimization: * * 1) if the object to sort is a sorted set. * 2) There is nothing to sort as dontsort is true (BY <constant string>). * 3) We have a LIMIT option that actually reduces the number of elements * to fetch. * * In this case to load all the objects in the vector is a huge waste of * resources. We just allocate a vector that is big enough for the selected * range length, and make sure to load just this part in the vector. */ if (sortval->type == REDIS_ZSET && dontsort && (start != 0 || end != vectorlen-1)) { vectorlen = end-start+1; } /* Load the sorting vector with all the objects to sort */ vector = zmalloc(sizeof(redisSortObject)*vectorlen); j = 0; if (sortval->type == REDIS_LIST) { listTypeIterator *li = listTypeInitIterator(sortval,0,REDIS_TAIL); listTypeEntry entry; while(listTypeNext(li,&entry)) { vector[j].obj = listTypeGet(&entry); vector[j].u.score = 0; vector[j].u.cmpobj = NULL; j++; } listTypeReleaseIterator(li); } else if (sortval->type == REDIS_SET) { setTypeIterator *si = setTypeInitIterator(sortval); robj *ele; while((ele = setTypeNextObject(si)) != NULL) { vector[j].obj = ele; vector[j].u.score = 0; vector[j].u.cmpobj = NULL; j++; } setTypeReleaseIterator(si); } else if (sortval->type == REDIS_ZSET && dontsort) { /* Special handling for a sorted set, if 'dontsort' is true. * This makes sure we return elements in the sorted set original * ordering, accordingly to DESC / ASC options. * * Note that in this case we also handle LIMIT here in a direct * way, just getting the required range, as an optimization. */ zset *zs = sortval->ptr; zskiplist *zsl = zs->zsl; zskiplistNode *ln; robj *ele; int rangelen = vectorlen; /* Check if starting point is trivial, before doing log(N) lookup. */ if (desc) { long zsetlen = dictSize(((zset*)sortval->ptr)->dict); ln = zsl->tail; if (start > 0) ln = zslGetElementByRank(zsl,zsetlen-start); } else { ln = zsl->header->level[0].forward; if (start > 0) ln = zslGetElementByRank(zsl,start+1); } while(rangelen--) { redisAssertWithInfo(c,sortval,ln != NULL); ele = ln->obj; vector[j].obj = ele; vector[j].u.score = 0; vector[j].u.cmpobj = NULL; j++; ln = desc ? ln->backward : ln->level[0].forward; } /* The code producing the output does not know that in the case of * sorted set, 'dontsort', and LIMIT, we are able to get just the * range, already sorted, so we need to adjust "start" and "end" * to make sure start is set to 0. */ end -= start; start = 0; } else if (sortval->type == REDIS_ZSET) { dict *set = ((zset*)sortval->ptr)->dict; dictIterator *di; dictEntry *setele; di = dictGetIterator(set); while((setele = dictNext(di)) != NULL) { vector[j].obj = dictGetKey(setele); vector[j].u.score = 0; vector[j].u.cmpobj = NULL; j++; } dictReleaseIterator(di); } else { redisPanic("Unknown type"); } redisAssertWithInfo(c,sortval,j == vectorlen); /* Now it's time to load the right scores in the sorting vector */ if (dontsort == 0) { for (j = 0; j < vectorlen; j++) { robj *byval; if (sortby) { /* lookup value to sort by */ byval = lookupKeyByPattern(c->db,sortby,vector[j].obj); if (!byval) continue; } else { /* use object itself to sort by */ byval = vector[j].obj; } if (alpha) { if (sortby) vector[j].u.cmpobj = getDecodedObject(byval); } else { if (byval->encoding == REDIS_ENCODING_RAW) { char *eptr; vector[j].u.score = strtod(byval->ptr,&eptr); if (eptr[0] != '\0' || errno == ERANGE || isnan(vector[j].u.score)) { int_convertion_error = 1; } } else if (byval->encoding == REDIS_ENCODING_INT) { /* Don't need to decode the object if it's * integer-encoded (the only encoding supported) so * far. We can just cast it */ vector[j].u.score = (long)byval->ptr; } else { redisAssertWithInfo(c,sortval,1 != 1); } } /* when the object was retrieved using lookupKeyByPattern, * its refcount needs to be decreased. */ if (sortby) { decrRefCount(byval); } } } if (dontsort == 0) { server.sort_desc = desc; server.sort_alpha = alpha; server.sort_bypattern = sortby ? 1 : 0; server.sort_store = storekey ? 1 : 0; if (sortby && (start != 0 || end != vectorlen-1)) pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end); else qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare); } /* Send command output to the output buffer, performing the specified * GET/DEL/INCR/DECR operations if any. */ outputlen = getop ? getop*(end-start+1) : end-start+1; if (int_convertion_error) { addReplyError(c,"One or more scores can't be converted into double"); } else if (storekey == NULL) { /* STORE option not specified, sent the sorting result to client */ addReplyMultiBulkLen(c,outputlen); for (j = start; j <= end; j++) { listNode *ln; listIter li; if (!getop) addReplyBulk(c,vector[j].obj); listRewind(operations,&li); while((ln = listNext(&li))) { redisSortOperation *sop = ln->value; robj *val = lookupKeyByPattern(c->db,sop->pattern, vector[j].obj); if (sop->type == REDIS_SORT_GET) { if (!val) { addReply(c,shared.nullbulk); } else { addReplyBulk(c,val); decrRefCount(val); } } else { /* Always fails */ redisAssertWithInfo(c,sortval,sop->type == REDIS_SORT_GET); } } } } else { robj *sobj = createZiplistObject(); /* STORE option specified, set the sorting result as a List object */ for (j = start; j <= end; j++) { listNode *ln; listIter li; if (!getop) { listTypePush(sobj,vector[j].obj,REDIS_TAIL); } else { listRewind(operations,&li); while((ln = listNext(&li))) { redisSortOperation *sop = ln->value; robj *val = lookupKeyByPattern(c->db,sop->pattern, vector[j].obj); if (sop->type == REDIS_SORT_GET) { if (!val) val = createStringObject("",0); /* listTypePush does an incrRefCount, so we should take care * care of the incremented refcount caused by either * lookupKeyByPattern or createStringObject("",0) */ listTypePush(sobj,val,REDIS_TAIL); decrRefCount(val); } else { /* Always fails */ redisAssertWithInfo(c,sortval,sop->type == REDIS_SORT_GET); } } } } if (outputlen) { setKey(c->db,storekey,sobj); notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"sortstore",storekey, c->db->id); server.dirty += outputlen; } else if (dbDelete(c->db,storekey)) { signalModifiedKey(c->db,storekey); notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",storekey,c->db->id); server.dirty++; } decrRefCount(sobj); addReplyLongLong(c,outputlen); } /* Cleanup */ if (sortval->type == REDIS_LIST || sortval->type == REDIS_SET) for (j = 0; j < vectorlen; j++) decrRefCount(vector[j].obj); decrRefCount(sortval); listRelease(operations); for (j = 0; j < vectorlen; j++) { if (alpha && vector[j].u.cmpobj) decrRefCount(vector[j].u.cmpobj); } zfree(vector); }
代码很长,我们细细来品味:
前面的注释作者也提到了,代码为了性能优化,牺牲了一点可读性。
1.先查找key对应的value.
2.假如value类型不是set/list/zset这种可排序的列表类型的,则返回客户端wrongtypeerr
3.开始解析key后面的参数列表,类似sql语句的asc/desc/limit/by这样的语法。
4.如果by语法后跟的参数不带'*',并且value类型为REDIS_SET,则根据字符串进行排序(ALPHA)。
5.获取需要排序对象集合的长度。
6.接下来的过程针对以下几种情况做了优化:
a)如果要排序的集合已经排序过了。
b)如果使用了by的集合dontsort为true
c)使用了LIMIT选项降低了需要排序的元素数量。
以上3种场景只会分配足够大小的内存,减少内存的浪费。
7.根据vectorlen初始化vector
8.把需要排序的数据转载到vector中
9.初始化vector[j].u.score,将by后面跟的key的值转换成浮点数。
10.调用pqsort或者qsort进行排序。(这里的pqsort使用了bsd的qsort实现,可以对指定的子集进行排序)
11.把要输出的内容写到buf中。
12.如果有STORE选项,把排序完的结果设置成list object.
13.清理vector.
格式为:
SCAN cursor [MATCH pattern] [COUNT count]
SCAN 命令及其相关的 SSCAN 命令、 HSCAN 命令和 ZSCAN 命令都用于增量地迭代(incrementally iterate)一集元素(a collection of elements):
SCAN 命令用于迭代当前数据库中的数据库键。
SSCAN 命令用于迭代集合键中的元素。
HSCAN 命令用于迭代哈希键中的键值对。
ZSCAN 命令用于迭代有序集合中的元素(包括元素成员和元素分值)。
我们看最终的代码:
/* This command implements SCAN, HSCAN and SSCAN commands. * If object 'o' is passed, then it must be a Hash or Set object, otherwise * if 'o' is NULL the command will operate on the dictionary associated with * the current database. * * When 'o' is not NULL the function assumes that the first argument in * the client arguments vector is a key so it skips it before iterating * in order to parse options. * * In the case of a Hash object the function returns both the field and value * of every element on the Hash. */ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) { int i, j; list *keys = listCreate(); listNode *node, *nextnode; long count = 10; sds pat; int patlen, use_pattern = 0; dict *ht; /* Object must be NULL (to iterate keys names), or the type of the object * must be Set, Sorted Set, or Hash. */ redisAssert(o == NULL || o->type == REDIS_SET || o->type == REDIS_HASH || o->type == REDIS_ZSET); /* Set i to the first option argument. The previous one is the cursor. */ i = (o == NULL) ? 2 : 3; /* Skip the key argument if needed. */ /* Step 1: Parse options. */ while (i < c->argc) { j = c->argc - i; if (!strcasecmp(c->argv[i]->ptr, "count") && j >= 2) { if (getLongFromObjectOrReply(c, c->argv[i+1], &count, NULL) != REDIS_OK) { goto cleanup; } if (count < 1) { addReply(c,shared.syntaxerr); goto cleanup; } i += 2; } else if (!strcasecmp(c->argv[i]->ptr, "match") && j >= 2) { pat = c->argv[i+1]->ptr; patlen = sdslen(pat); /* The pattern always matches if it is exactly "*", so it is * equivalent to disabling it. */ use_pattern = !(pat[0] == '*' && patlen == 1); i += 2; } else { addReply(c,shared.syntaxerr); goto cleanup; } } /* Step 2: Iterate the collection. * * Note that if the object is encoded with a ziplist, intset, or any other * representation that is not a hash table, we are sure that it is also * composed of a small number of elements. So to avoid taking state we * just return everything inside the object in a single call, setting the * cursor to zero to signal the end of the iteration. */ /* Handle the case of a hash table. */ ht = NULL; if (o == NULL) { ht = c->db->dict; } else if (o->type == REDIS_SET && o->encoding == REDIS_ENCODING_HT) { ht = o->ptr; } else if (o->type == REDIS_HASH && o->encoding == REDIS_ENCODING_HT) { ht = o->ptr; count *= 2; /* We return key / value for this type. */ } else if (o->type == REDIS_ZSET && o->encoding == REDIS_ENCODING_SKIPLIST) { zset *zs = o->ptr; ht = zs->dict; count *= 2; /* We return key / value for this type. */ } if (ht) { void *privdata[2]; /* We set the max number of iterations to ten times the specified * COUNT, so if the hash table is in a pathological state (very * sparsely populated) we avoid to block too much time at the cost * of returning no or very few elements. */ long maxiterations = count*10; /* We pass two pointers to the callback: the list to which it will * add new elements, and the object containing the dictionary so that * it is possible to fetch more data in a type-dependent way. */ privdata[0] = keys; privdata[1] = o; do { cursor = dictScan(ht, cursor, scanCallback, privdata); } while (cursor && maxiterations-- && listLength(keys) < (unsigned long)count); } else if (o->type == REDIS_SET) { int pos = 0; int64_t ll; while(intsetGet(o->ptr,pos++,&ll)) listAddNodeTail(keys,createStringObjectFromLongLong(ll)); cursor = 0; } else if (o->type == REDIS_HASH || o->type == REDIS_ZSET) { unsigned char *p = ziplistIndex(o->ptr,0); unsigned char *vstr; unsigned int vlen; long long vll; while(p) { ziplistGet(p,&vstr,&vlen,&vll); listAddNodeTail(keys, (vstr != NULL) ? createStringObject((char*)vstr,vlen) : createStringObjectFromLongLong(vll)); p = ziplistNext(o->ptr,p); } cursor = 0; } else { redisPanic("Not handled encoding in SCAN."); } /* Step 3: Filter elements. */ node = listFirst(keys); while (node) { robj *kobj = listNodeValue(node); nextnode = listNextNode(node); int filter = 0; /* Filter element if it does not match the pattern. */ if (!filter && use_pattern) { if (kobj->encoding == REDIS_ENCODING_INT) { char buf[REDIS_LONGSTR_SIZE]; int len; redisAssert(kobj->encoding == REDIS_ENCODING_INT); len = ll2string(buf,sizeof(buf),(long)kobj->ptr); if (!stringmatchlen(pat, patlen, buf, len, 0)) filter = 1; } else { if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0)) filter = 1; } } /* Filter element if it is an expired key. */ if (!filter && o == NULL && expireIfNeeded(c->db, kobj)) filter = 1; /* Remove the element and its associted value if needed. */ if (filter) { decrRefCount(kobj); listDelNode(keys, node); } /* If this is a hash or a sorted set, we have a flat list of * key-value elements, so if this element was filtered, remove the * value, or skip it if it was not filtered: we only match keys. */ if (o && (o->type == REDIS_ZSET || o->type == REDIS_HASH)) { node = nextnode; nextnode = listNextNode(node); if (filter) { kobj = listNodeValue(node); decrRefCount(kobj); listDelNode(keys, node); } } node = nextnode; } /* Step 4: Reply to the client. */ addReplyMultiBulkLen(c, 2); addReplyBulkLongLong(c,cursor); addReplyMultiBulkLen(c, listLength(keys)); while ((node = listFirst(keys)) != NULL) { robj *kobj = listNodeValue(node); addReplyBulk(c, kobj); decrRefCount(kobj); listDelNode(keys, node); } cleanup: listSetFreeMethod(keys,decrRefCountVoid); listRelease(keys); }
1.解析参数,假如match后面跟的为'*',那么use_pattern被禁用,相当于所有的元素都符合要求。
2.开始遍历元素
3.过滤元素
4.返回结果给客户端