Skip to content

Commit

Permalink
NSFS : S3 rm with --recursive option does not delete all the objects
Browse files Browse the repository at this point in the history
Signed-off-by: naveenpaul1 <[email protected]>
  • Loading branch information
naveenpaul1 committed Nov 7, 2024
1 parent 4379cbf commit c849725
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 33 deletions.
64 changes: 34 additions & 30 deletions src/sdk/namespace_fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
'use strict';

const _ = require('lodash');
const fs = require('fs');
const path = require('path');
const util = require('util');
const mime = require('mime');
Expand All @@ -14,6 +13,7 @@ const dbg = require('../util/debug_module')(__filename);
const config = require('../../config');
const crypto = require('crypto');
const s3_utils = require('../endpoint/s3/s3_utils');
const js_utils = require('../util/js_utils');
const error_utils = require('../util/error_utils');
const stream_utils = require('../util/stream_utils');
const buffer_utils = require('../util/buffer_utils');
Expand Down Expand Up @@ -250,24 +250,6 @@ function is_sparse_file(stat) {
return (stat.blocks * 512 < stat.size);
}

/**
* @param {fs.Dirent} e
* @returns {string}
*/
function get_entry_name(e) {
return e.name;
}

/**
* @param {string} name
* @returns {fs.Dirent}
*/
function make_named_dirent(name) {
const entry = new fs.Dirent();
entry.name = name;
return entry;
}

function to_xattr(fs_xattr) {
const xattr = _.mapKeys(fs_xattr, (val, key) => {
// Prioritize ignore list
Expand Down Expand Up @@ -680,14 +662,30 @@ class NamespaceFS {
const marker_dir = key_marker.slice(0, dir_key.length);
const marker_ent = key_marker.slice(dir_key.length);
// marker is after dir so no keys in this dir can apply
if (dir_key < marker_dir) {
if (is_first_dir_under_second_dir(dir_key, marker_dir)) {
// dbg.log0(`marker is after dir so no keys in this dir can apply: dir_key=${dir_key} marker_dir=${marker_dir}`);
return;
}
// when the dir portion of the marker is completely below the current dir
// then every key in this dir satisfies the marker and marker_ent should not be used.
const marker_curr = (marker_dir < dir_key) ? '' : marker_ent;
const marker_curr = is_first_dir_under_second_dir(marker_dir, dir_key) ? '' : marker_ent;
// dbg.log0(`process_dir: dir_key=${dir_key} prefix_ent=${prefix_ent} marker_curr=${marker_curr}`);

// compares dir path. returns true if dir1 is under second dir, false if otherwise
// replasing slash with space to make sure secial charecters are not getting
// more priory than backslash while comparing.
/**
* @param {string} first_dir
* @param {string} second_dir
*/
function is_first_dir_under_second_dir(first_dir, second_dir) {
// first_dir and second_dir comparison will fail when there are folder with structure slimier to
// "dir_prfix.12345/" and "dir_prfix.12345.backup/" because "/" considered lesser than "." to fix this
// all the backslash is replaced with space. This updated first_dir and second_dir used only for comparison.
first_dir = first_dir.replaceAll('/', ' ');
second_dir = second_dir.replaceAll('/', ' ');
return first_dir < second_dir;
}
/**
* @typedef {{
* key: string,
Expand All @@ -696,12 +694,19 @@ class NamespaceFS {
*/
const insert_entry_to_results_arr = async r => {
let pos;

const r_key_updated = r.key.replaceAll('/', ' ');
// Since versions are arranged next to latest object in the latest first order,
// no need to find the sorted last index. Push the ".versions/#VERSION_OBJECT" as
// they are in order
if (results.length && r.key < results[results.length - 1].key &&
if (results.length && r_key_updated < results[results.length - 1].key &&
!this._is_hidden_version_path(r.key)) {
pos = _.sortedLastIndexBy(results, r, a => a.key);
pos = js_utils.sortedLastIndexBy(results,
curr => {
const curr_key = r_key_updated.includes('/') ? curr.key : curr.key.replaceAll('/', ' ');
return curr_key < r_key_updated;
},
);
} else {
pos = results.length;
}
Expand Down Expand Up @@ -731,7 +736,7 @@ class NamespaceFS {
const process_entry = async ent => {
// dbg.log0('process_entry', dir_key, ent.name);
if ((!ent.name.startsWith(prefix_ent) ||
ent.name < marker_curr ||
ent.name < marker_curr.split('/')[0] ||
ent.name === this.get_bucket_tmpdir_name() ||
ent.name === config.NSFS_FOLDER_OBJECT_NAME) ||
this._is_hidden_version_path(ent.name)) {
Expand Down Expand Up @@ -774,7 +779,7 @@ class NamespaceFS {
// insert dir object to objects list if its key is lexicographicly bigger than the key marker &&
// no delimiter OR prefix is the current directory entry
const is_dir_content = cached_dir.stat.xattr && cached_dir.stat.xattr[XATTR_DIR_CONTENT];
if (is_dir_content && dir_key > key_marker && (!delimiter || dir_key === prefix)) {
if (is_dir_content && is_first_dir_under_second_dir(marker_dir, dir_key) && (!delimiter || dir_key === prefix)) {
const r = { key: dir_key, common_prefix: false };
await insert_entry_to_results_arr(r);
}
Expand All @@ -797,10 +802,9 @@ class NamespaceFS {
{name: start_marker}
) + 1;
} else {
marker_index = _.sortedLastIndexBy(
sorted_entries,
make_named_dirent(marker_curr),
get_entry_name
const marker_curr_updated = marker_curr.split('/')[0].replaceAll('/', ' ');
marker_index = js_utils.sortedLastIndexBy(sorted_entries,
curr => curr.name.replaceAll('/', ' ') <= marker_curr_updated,
);
}

Expand Down Expand Up @@ -3360,6 +3364,7 @@ class NamespaceFS {
}
}


/** @type {PersistentLogger} */
NamespaceFS._migrate_wal = null;

Expand All @@ -3368,4 +3373,3 @@ NamespaceFS._restore_wal = null;

module.exports = NamespaceFS;
module.exports.multi_buffer_pool = multi_buffer_pool;

227 changes: 227 additions & 0 deletions src/test/unit_tests/jest_tests/test_nsfs_list_object.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/* Copyright (C) 2016 NooBaa */
/* eslint-disable no-undef */
'use strict';
const path = require('path');
const fs_utils = require('../../../util/fs_utils');
const { TMP_PATH } = require('../../system_tests/test_utils');
const NamespaceFS = require('../../../sdk/namespace_fs');
const buffer_utils = require('../../../util/buffer_utils');
const tmp_ns_nsfs_path = path.join(TMP_PATH, 'test_nsfs_namespace_list');
const nsfs_src_bkt = 'nsfs_src';
const ns_nsfs_tmp_bucket_path = `${tmp_ns_nsfs_path}/${nsfs_src_bkt}`;
const list_bkt = 'test_namespace_list_object';
const timeout = 50000;
const files_without_folders_to_upload = make_keys(264, i => `file_without_folder${i}`);
const folders_to_upload = make_keys(264, i => `folder${i}/`);
const files_in_folders_to_upload = make_keys(264, i => `folder1/file${i}`);
const files_in_inner_folders_to_upload = make_keys(264, i => `folder1/inner_folder/file${i}`);
const files_in_inner_backup_folders_to_upload = make_keys(264, i => `folder1/inner_folder.backup/file${i}`);
const files_in_inner_all_backup_folders_to_upload = make_keys(264, i => `folder1/inner.folder.all.backup/file${i}`);
const files_in_inner_all_folders_to_upload = make_keys(264, i => `folder1/inner_folder.all/file${i}`);
const dummy_object_sdk = make_dummy_object_sdk();
const ns_nsfs_tmp = new NamespaceFS({ bucket_path: ns_nsfs_tmp_bucket_path, bucket_id: '3', namespace_resource_id: undefined });
let sorted_all_dir_entries;
function make_dummy_object_sdk() {
return {
requesting_account: {
force_md5_etag: false,
nsfs_account_config: {
uid: process.getuid(),
gid: process.getgid(),
}
},
abort_controller: new AbortController(),
throw_if_aborted() {
if (this.abort_controller.signal.aborted) throw new Error('request aborted signal');
}
};
}
function sort_entries_by_name(a, b) {
if (a.replaceAll('/', ' ') < b.replaceAll('/', ' ')) return -1;
if (a.replaceAll('/', ' ') > b.replaceAll('/', ' ')) return 1;
return 0;
}
// eslint-disable-next-line max-lines-per-function
describe('manage sorted list flow', () => {
describe('list objects - pagination', () => {
beforeAll(async () => {
await fs_utils.create_fresh_path(ns_nsfs_tmp_bucket_path);
await create_keys(list_bkt, ns_nsfs_tmp, [
...files_without_folders_to_upload,
...folders_to_upload,
...files_in_folders_to_upload,
...files_in_inner_folders_to_upload,
...files_in_inner_backup_folders_to_upload,
...files_in_inner_all_backup_folders_to_upload,
...files_in_inner_all_folders_to_upload,
]);
sorted_all_dir_entries = [
...files_without_folders_to_upload,
...folders_to_upload,
...files_in_folders_to_upload,
...files_in_inner_folders_to_upload,
...files_in_inner_backup_folders_to_upload,
...files_in_inner_all_backup_folders_to_upload,
...files_in_inner_all_folders_to_upload,
];
sorted_all_dir_entries.sort(sort_entries_by_name);
}, timeout);
afterAll(async function() {
await delete_keys(list_bkt, ns_nsfs_tmp, [
...folders_to_upload,
...files_in_folders_to_upload,
...files_without_folders_to_upload,
...files_in_inner_folders_to_upload,
...files_in_inner_backup_folders_to_upload,
...files_in_inner_all_backup_folders_to_upload,
...files_in_inner_all_folders_to_upload,
]);
await fs_utils.folder_delete(`${ns_nsfs_tmp_bucket_path}`);
});
it('page=1000', async () => {
let r;
let total_items = 0;
let object_item_start_index = 0;
for (;;) {
r = await ns_nsfs_tmp.list_objects({
bucket: list_bkt,
key_marker: r ? r.next_marker : "",
}, dummy_object_sdk);
object_item_start_index = total_items;
total_items += r.objects.length;
await validat_pagination(r, total_items, object_item_start_index);
if (!r.next_marker) {
break;
}
}
}, timeout);
it('page=500', async () => {
let r;
let total_items = 0;
let object_item_start_index = 0;
for (;;) {
r = await ns_nsfs_tmp.list_objects({
bucket: list_bkt,
limit: 500,
key_marker: r ? r.next_marker : "",
}, dummy_object_sdk);
object_item_start_index = total_items;
total_items += r.objects.length;
await validat_pagination(r, total_items, object_item_start_index);
if (!r.next_marker) {
break;
}
}
}, timeout);
it('page=264', async () => {
let r;
let total_items = 0;
let object_item_start_index = 0;
for (;;) {
r = await ns_nsfs_tmp.list_objects({
bucket: list_bkt,
limit: 264,
key_marker: r ? r.next_marker : "",
}, dummy_object_sdk);
object_item_start_index = total_items;
total_items += r.objects.length;
await validat_pagination(r, total_items, object_item_start_index);
if (!r.next_marker) {
break;
}
}
}, timeout);
it('page=250', async () => {
let r;
let total_items = 0;
let object_item_start_index = 0;
for (;;) {
r = await ns_nsfs_tmp.list_objects({
bucket: list_bkt,
limit: 250,
key_marker: r ? r.next_marker : "",
}, dummy_object_sdk);
object_item_start_index = total_items;
total_items += r.objects.length;
await validat_pagination(r, total_items, object_item_start_index);
if (!r.next_marker) {
break;
}
}
}, timeout);
it('page=100', async () => {
let r;
let total_items = 0;
let object_item_start_index = 0;
for (;;) {
r = await ns_nsfs_tmp.list_objects({
bucket: list_bkt,
limit: 100,
key_marker: r ? r.next_marker : "",
}, dummy_object_sdk);
object_item_start_index = total_items;
total_items += r.objects.length;
await validat_pagination(r, total_items, object_item_start_index);
if (!r.next_marker) {
break;
}
}
}, timeout);
it('page=10', async () => {
let r;
let total_items = 0;
let object_item_start_index = 0;
for (;;) {
r = await ns_nsfs_tmp.list_objects({
bucket: list_bkt,
limit: 10,
key_marker: r ? r.next_marker : "",
}, dummy_object_sdk);
object_item_start_index = total_items;
total_items += r.objects.length;
await validat_pagination(r, total_items, object_item_start_index);
if (!r.next_marker) {
break;
}
}
}, timeout);
});
});
async function validat_pagination(r, total_items, object_item_start_index) {
if (r.next_marker) {
expect(r.is_truncated).toStrictEqual(true);
expect(r.objects.map(it => it.key)).toEqual(sorted_all_dir_entries.slice(object_item_start_index, total_items));
} else {
expect(total_items).toEqual(1848);
expect(r.is_truncated).toStrictEqual(false);
}
}
/**
* @param {number} count
* @param {(i:number)=>string} gen
* @returns {string[]}
*/
function make_keys(count, gen) {
const arr = new Array(count);
for (let i = 0; i < count; ++i) arr[i] = gen(i);
arr.sort();
Object.freeze(arr);
return arr;
}
async function delete_keys(bkt, nsfs_delete_tmp, keys) {
await nsfs_delete_tmp.delete_multiple_objects({
bucket: bkt,
objects: keys.map(key => ({ key })),
}, dummy_object_sdk);
}
async function create_keys(bkt, nsfs_create_tmp, keys) {
return Promise.all(keys.map(async key => {
await nsfs_create_tmp.upload_object({
bucket: bkt,
key,
content_type: 'application/octet-stream',
source_stream: buffer_utils.buffer_to_read_stream(null),
size: 0
}, dummy_object_sdk);
}));
}
14 changes: 11 additions & 3 deletions src/test/unit_tests/test_namespace_fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -142,19 +142,27 @@ mocha.describe('namespace_fs', function() {

function assert_sorted_list(res) {
let prev_key = '';
const regex = /[^A-Za-z0-9]/;
for (const { key } of res.objects) {
if (res.next_marker) {
assert(key <= res.next_marker, 'bad next_marker at key ' + key);
}
assert(prev_key <= key, 'objects not sorted at key ' + key);
// String comparison will fail when with special character and backslash cases,
// Where special characters such as dot, hyphen are listed before backslash in sorted list.
// compare string only if key contains no special characters
if (!regex.test(key)) {
assert(prev_key <= key, 'objects not sorted at key ' + key + " prev_key: " + prev_key);
}
prev_key = key;
}
prev_key = '';
for (const key of res.common_prefixes) {
if (res.next_marker) {
assert(key <= res.next_marker, 'next_marker at key ' + key);
}
assert(prev_key <= key, 'prefixes not sorted at key ' + key);

if (!regex.test(key)) {
assert(prev_key <= key, 'prefixes not sorted at key ' + key + " prev_key: " + prev_key);
}
prev_key = key;
}
}
Expand Down
Loading

0 comments on commit c849725

Please sign in to comment.