Skip to content

Commit

Permalink
Improve free-list management
Browse files Browse the repository at this point in the history
  • Loading branch information
kriszyp committed Feb 12, 2024
1 parent 1eb9973 commit 98c4deb
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 20 deletions.
106 changes: 95 additions & 11 deletions dependencies/lmdb/libraries/liblmdb/mdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1581,7 +1581,9 @@ typedef struct MDB_xcursor {
/** State of FreeDB old pages, stored in the MDB_env */
typedef struct MDB_pgstate {
pgno_t *mf_pghead; /**< Reclaimed freeDB pages, or NULL before use */
pgno_t *mf_block_size_cache; /**< Cache of contiguous blocks, by size and page_no pairs */
txnid_t mf_pglast; /**< ID of last used record, or 0 if !mf_pghead */
unsigned mf_position; /** Position in the free page list, so that we can keep trying to write to the same block if possible */
} MDB_pgstate;
/*<lmdb-js>*/
struct MDB_last_map {
Expand Down Expand Up @@ -1642,6 +1644,8 @@ struct MDB_env {
MDB_pgstate me_pgstate; /**< state of old pages from freeDB */
# define me_pglast me_pgstate.mf_pglast
# define me_pghead me_pgstate.mf_pghead
# define me_block_size_cache me_pgstate.mf_block_size_cache
# define me_freelist_position me_pgstate.mf_position
MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */
/** IDL of pages that became unused in a write txn */
MDB_IDL me_free_pgs;
Expand Down Expand Up @@ -2718,6 +2722,13 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
goto fail;
}

if (!env->me_block_size_cache) {
env->me_block_size_cache = calloc(32, sizeof(pgno_t));
env->me_block_size_cache[0] = 31;
}
unsigned cache_size = env->me_block_size_cache[0];
pgno_t best_fit_start = 0; // this is a block we will use if we don't find an exact fit
pgno_t best_fit_size = -1;
for (op = MDB_FIRST;; op = MDB_NEXT) {
MDB_val key, data;
MDB_node *leaf;
Expand All @@ -2726,13 +2737,71 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
/* Seek a big enough contiguous page range. Prefer
* pages at the tail, just truncating the list.
*/
pgno_t block_start;
fprintf(stderr, "looking for block of size %u\n", num);
if (cache_size > num) {
block_start = env->me_block_size_cache[num];
if (block_start > 0) {
fprintf(stderr, "found block %u of right size %u\n", block_start, num);
// we found a block of the right size
env->me_block_size_cache[num] = 0; // clear it out, since it will be used (or it is invalid)
pgno = mdb_midl_search(env->me_pghead, block_start);
fprintf(stderr, "does it checkout %u == %u\n", block_start, pgno);
if (pgno == block_start && pgno + num <= mop_len) { // double check it
goto search_done;
}
}
}
block_start = 0;
unsigned block_size = 0;
ssize_t entry;
// TODO: Skip this on the first iteration, since we already checked the cache
if (mop_len > n2) {
i = mop_len;
do {
pgno = mop[i];
if (mop[i-n2] == pgno+n2)
goto search_done;
} while (--i > n2);
entry = i == 0 ? 0 : mop[i];
fprintf(stderr, "pgno %u next would be %u\n", entry, block_start + block_size);
if (entry == 0) continue;
if (entry > 0) {
pgno = entry;
block_size = 1;
} else {
block_size = -entry;
pgno = mop[--i];
}
if (pgno == block_start + block_size) {
block_size++; // count current contiguous block size
} else {
if (block_size >= num) {
if (block_size == num) {
// we found a block of the right size
pgno = block_start;
goto search_done;
} else if (block_size < best_fit_size || best_fit_size == 0) {
best_fit_start = block_start;
best_fit_size = block_size;
}
}
if (block_size > 0) {
// cache this block size
if (block_size >= 2<<30) block_size = (2<<30) - 1;
unsigned cache_size = env->me_block_size_cache[0];
if (block_size > cache_size) {
fprintf(stderr, "expand block size cache to %u\n", block_size << 1);
env->me_block_size_cache = realloc(env->me_block_size_cache, (block_size << 1) * sizeof(pgno_t));
env->me_block_size_cache[0] = (block_size << 1) - 1;
memset(env->me_block_size_cache + cache_size + 1, 0, (env->me_block_size_cache[0] - cache_size) * sizeof(pgno_t));
cache_size = env->me_block_size_cache[0];
}
env->me_block_size_cache[block_size] = block_start;
fprintf(stderr, "cached block %u of size %u\n", block_start, block_size);
}
block_start = pgno;
block_size = 1;
}
//if (mop[i-n2] == pgno+n2)
// goto search_done;
} while (--i >= 0);
if (--retry < 0)
break;
}
Expand Down Expand Up @@ -2813,10 +2882,23 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
DPRINTF(("IDL %"Yu, idl[j]));
#endif
/* Merge in descending sorted order */
mdb_midl_xmerge(mop, idl);
fprintf(stderr, "merge\n");
for (unsigned i = i; i < idl[0]; i++) {
if (mdb_midl_insert(mop, idl[i]) == -3) {
if ((rc = mdb_midl_need(&env->me_pghead, idl[0])) != 0)
goto fail;
mop = env->me_pghead;
}
//mdb_midl_xmerge(mop, idl);
}
mop_len = mop[0];
}

if (best_fit_start > 0) {
pgno = best_fit_start;
fprintf(stderr, "using best fit at %u size %u of %u\n", pgno, num, best_fit_size);
env->me_block_size_cache[best_fit_size] = 0; // clear this out of the cache (TODO: could move it)
goto search_done;
}
/* Use new pages from the map when nothing suitable in the freeDB */
i = 0;
pgno = txn->mt_next_pgno;
Expand All @@ -2841,6 +2923,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
#endif

search_done:
fprintf(stderr, "alloc pgno %u\n", pgno);
if (env->me_flags & MDB_WRITEMAP) {
np = (MDB_page *)(env->me_map + env->me_psize * pgno);
} else {
Expand Down Expand Up @@ -3766,8 +3849,9 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
mdb_midl_shrink(&txn->mt_free_pgs);
env->me_free_pgs = txn->mt_free_pgs;
/* me_pgstate: */
env->me_pghead = NULL;
env->me_pglast = 0;
fprintf(stderr, "txn_end env->me_pghead %p", env->me_pghead);
//env->me_pghead = NULL;
//env->me_pglast = 0;

env->me_txn = NULL;
mode = 0; /* txn == env->me_txn0, do not free() it */
Expand All @@ -3782,7 +3866,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
mdb_midl_free(txn->mt_free_pgs);
free(txn->mt_u.dirty_list);
}
mdb_midl_free(pghead);
//mdb_midl_free(pghead);
}
#if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags) && !txn->mt_parent) {
Expand Down Expand Up @@ -4610,8 +4694,8 @@ mdb_txn_commit(MDB_txn *txn)
if (rc)
goto fail;

mdb_midl_free(env->me_pghead);
env->me_pghead = NULL;
//mdb_midl_free(env->me_pghead);
//env->me_pghead = NULL;
mdb_midl_shrink(&txn->mt_free_pgs);

#if (MDB_DEBUG) > 2
Expand Down
67 changes: 58 additions & 9 deletions dependencies/lmdb/libraries/liblmdb/midl.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id )
return cursor;
}

#if 0 /* superseded by append/sort */
/* superseded by append/sort */
int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
{
unsigned x, i;
Expand All @@ -91,15 +91,51 @@ int mdb_midl_insert( MDB_IDL ids, MDB_ID id )
return -2;

} else {
/* insert id */
for (i=ids[0]; i>x; i--)
ids[i] = ids[i-1];
ids[x] = id;
if (x >= ids[0]) return -3; // at the end
MDB_ID next_id = ids[x];
if (id < 0) next_id = ids[x + 1];
if (id - 1 == next_id) {
// connected to next entry
ids[x]--; // increment negatively, as we have just expanded a block
ids[x + 1] = id;
return 0;
}
unsigned before = x;
while (!ids[--before] && before >= 0){} // move past empty entries
if (before >= 0) {
MDB_ID next_id = before > 0 ? ids[before] : 0;
int count = before > 1 ? -ids[before - 1] : 0;
if (count < 1) count = 1;
if (next_id - count == id) {
// connected to previous entry
if (count > 1) {
ids[before - 1]--; // can just update the count to include this id
return 0;
} else {
// TODO: need to make space for this one
}
}
}
if (before + 1 < x) {
// there is an empty slot we can use, find a place in the middle
ids[(before + x) >> 1] = id;
return 0;
}
// move items to try to make room
MDB_ID last_id = id;
i = x;
do {
MDB_ID next_id = ids[i];
ids[i++] = last_id;
last_id = next_id;
} while(next_id);
if (x == ids[0] || // if it is full
x - i > ids[0] >> 3) // or too many moves. TODO: This threshold should actually be more like the square root of the length
return -3; // request to grow
}

return 0;
}
#endif

MDB_IDL mdb_midl_alloc(int num)
{
Expand Down Expand Up @@ -146,10 +182,23 @@ int mdb_midl_need( MDB_IDL *idp, unsigned num )
num += ids[0];
if (num > ids[-1]) {
num = (num + num/4 + (256 + 2)) & -256;
if (!(ids = realloc(ids-1, num * sizeof(MDB_ID))))
MDB_IDL new_ids;
if (!(new_ids = alloc(ids-1, num * sizeof(MDB_ID))))
return ENOMEM;
*ids++ = num - 2;
*idp = ids;
*new_ids++ = num - 2;
unsigned j = 0;
// re-spread out the entries with gaps for growth
for (unsigned i = 1; i < ids[0]; i++) {
new_ids[j++] = 0; // empty slot for growth
ssize_t entry;
while (!(entry = ids[i])) { i++; }
new_ids[j++] = entry;
if (entry < 0) new_ids[j++] = ids[i++]; // this was a block with a length
}
// now shrink (or grow) back to appropriate size
new_ids = alloc(new_ids - 1, (j + (j >> 3)) * sizeof(MDB_ID));
new_ids++;
*idp = new_ids;
}
return 0;
}
Expand Down
7 changes: 7 additions & 0 deletions dependencies/lmdb/libraries/liblmdb/midl.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ void mdb_midl_shrink(MDB_IDL *idp);
*/
int mdb_midl_need(MDB_IDL *idp, unsigned num);

/** Insert an ID into an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] id The ID to append.
* @return 0 on success, ENOMEM if the IDL is too large.
*/
int mdb_midl_insert( MDB_IDL *idp, MDB_ID id );

/** Append an ID onto an IDL.
* @param[in,out] idp Address of the IDL to append to.
* @param[in] id The ID to append.
Expand Down

0 comments on commit 98c4deb

Please sign in to comment.