diff --git a/.github/scripts/hypo/syncrand.py b/.github/scripts/hypo/syncrand.py new file mode 100644 index 000000000000..ae72586db196 --- /dev/null +++ b/.github/scripts/hypo/syncrand.py @@ -0,0 +1,142 @@ +import os +import subprocess +import json +import common +try: + __import__("hypothesis") +except ImportError: + subprocess.check_call(["pip", "install", "hypothesis"]) +from hypothesis import assume, strategies as st, settings, Verbosity +from hypothesis.stateful import rule, precondition, RuleBasedStateMachine, Bundle, initialize, multiple, consumes, invariant +from hypothesis import Phase, seed +from strategy import * +from fs_op import FsOperation +import random + +st_entry_name = st.text(alphabet='abc', min_size=1, max_size=3) +st_patterns = st.text(alphabet='abc?/*', min_size=1, max_size=5).\ + filter(lambda s: s.find('***') == -1 or s.endswith('/***')) +st_patterns = st.lists(st.sampled_from(['a','?','/','*', '/***']), min_size=1, max_size=10)\ + .map(''.join).filter(lambda s: s.find('***') == -1 or (s.count('/***')==1 and s.endswith('a/***'))) +st_patterns = st.lists(st.sampled_from(['a','?','/','*']), min_size=1, max_size=10)\ + .map(''.join).filter(lambda s: s.find('***') == -1 ) +st_patterns = st.lists(st.sampled_from(['a','?','/','*']), min_size=1, max_size=10)\ + .map(''.join).filter(lambda s: s.find('**') == -1 ) + +st_option = st.fixed_dictionaries({ + "option": st.just("--include") | st.just("--exclude"), + "pattern": st_patterns +}) +st_options = st.lists(st_option, min_size=1, max_size=10).\ + filter(lambda self: any(item["pattern"].endswith('/***') for item in self)) +st_options = st.lists(st_option, min_size=1, max_size=10) + +SEED=int(os.environ.get('SEED', random.randint(0, 1000000000))) +@seed(SEED) +class SyncMachine(RuleBasedStateMachine): + Files = Bundle('files') + Folders = Bundle('folders') + ROOT_DIR1 = '/tmp/sync_src' + ROOT_DIR2 = '/tmp/sync_src2' + DEST_RSYNC = '/tmp/rsync' + DEST_JUICESYNC = '/tmp/juicesync' + log_level = os.environ.get('LOG_LEVEL', 'INFO') + logger = common.setup_logger(f'./syncrand.log', 'syncrand_logger', log_level) + fsop = FsOperation(logger) + + @initialize(target=Folders) + def init_folders(self): + if not os.path.exists(self.ROOT_DIR1): + os.makedirs(self.ROOT_DIR1) + if not os.path.exists(self.ROOT_DIR2): + os.makedirs(self.ROOT_DIR2) + common.clean_dir(self.ROOT_DIR1) + common.clean_dir(self.ROOT_DIR2) + return '' + + def __init__(self): + super(SyncMachine, self).__init__() + + def equal(self, result1, result2): + if type(result1) != type(result2): + return False + if isinstance(result1, Exception): + r1 = str(result1).replace(self.ROOT_DIR1, '') + r2 = str(result2).replace(self.ROOT_DIR2, '') + return r1 == r2 + elif isinstance(result1, tuple): + return result1 == result2 + elif isinstance(result1, str): + r1 = str(result1).replace(self.ROOT_DIR1, '') + r2 = str(result2).replace(self.ROOT_DIR2, '') + return r1 == r2 + else: + return result1 == result2 + + @rule(target=Files, + parent = Folders.filter(lambda x: x != multiple()), + file_name = st_entry_name, + mode = st_open_mode, + content = st_content, + umask = st_umask, + ) + def create_file(self, parent, file_name, content, mode='x', user='root', umask=0o022): + result1 = self.fsop.do_create_file(self.ROOT_DIR1, parent, file_name, mode, content, user, umask) + result2 = self.fsop.do_create_file(self.ROOT_DIR2, parent, file_name, mode, content, user, umask) + assert self.equal(result1, result2), f'\033[31mcreate_file:\nresult1 is {result1}\nresult2 is {result2}\033[0m' + if isinstance(result1, Exception): + return multiple() + else: + return os.path.join(parent, file_name) + + @rule( target = Folders, + parent = Folders.filter(lambda x: x != multiple()), + subdir = st_entry_name, + mode = st_entry_mode, + umask = st_umask, + ) + def mkdir(self, parent, subdir, mode, user='root', umask=0o022): + result1 = self.fsop.do_mkdir(self.ROOT_DIR1, parent, subdir, mode, user, umask) + result2 = self.fsop.do_mkdir(self.ROOT_DIR2, parent, subdir, mode, user, umask) + assert self.equal(result1, result2), f'\033[31mmkdir:\nresult1 is {result1}\nresult2 is {result2}\033[0m' + if isinstance(result1, Exception): + return multiple() + else: + return os.path.join(parent, subdir) + + @rule(options = st_options + ) + def sync(self, options): + subprocess.check_call(['rm', '-rf', self.DEST_RSYNC]) + subprocess.check_call(['rm', '-rf', self.DEST_JUICESYNC]) + options = ' '.join([f'{item["option"]} {item["pattern"]}' for item in options]) + self.logger.info(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options}') + subprocess.check_call(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + self.logger.info(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options}') + subprocess.check_call(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + try: + subprocess.check_call(['diff', '-r', self.DEST_RSYNC, self.DEST_JUICESYNC]) + except subprocess.CalledProcessError as e: + print(f'\033[31m{e}\033[0m') + raise e + self.fsop.stats.success('do_sync') + + def teardown(self): + pass + +if __name__ == '__main__': + MAX_EXAMPLE=int(os.environ.get('MAX_EXAMPLE', '1000')) + STEP_COUNT=int(os.environ.get('STEP_COUNT', '50')) + settings.register_profile("dev", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, + print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \ + report_multiple_bugs=False, + phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain]) + settings.register_profile("ci", max_examples=MAX_EXAMPLE, verbosity=Verbosity.normal, + print_blob=False, stateful_step_count=STEP_COUNT, deadline=None, \ + report_multiple_bugs=False, + phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain]) + profile = os.environ.get('PROFILE', 'dev') + settings.load_profile(profile) + juicefs_machine = SyncMachine.TestCase() + juicefs_machine.runTest() + print(json.dumps(FsOperation.stats.get(), sort_keys=True, indent=4)) diff --git a/.github/scripts/hypo/syncrand_test.py b/.github/scripts/hypo/syncrand_test.py new file mode 100644 index 000000000000..fc0cd9176bf1 --- /dev/null +++ b/.github/scripts/hypo/syncrand_test.py @@ -0,0 +1,47 @@ +import unittest +from syncrand import SyncMachine + +class TestFsrand2(unittest.TestCase): + + def test_sync1(self): + state = SyncMachine() + v1 = state.init_folders() + v2 = state.mkdir(mode=0, parent=v1, subdir='a', umask=0) + v3 = state.create_file(content=b'', file_name=v2, mode='w', parent=v2, umask=0) + state.sync(options=[{'option': '--include', 'pattern': 'aa/***'}, + {'option': '--exclude', 'pattern': 'a?**'}]) + state.teardown() + + def test_sync2(self): + state = SyncMachine() + v1 = state.init_folders() + v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0) + state.sync(options=[{'option': '--exclude', 'pattern': '**/***'}]) + state.teardown() + + def test_sync3(self): + state = SyncMachine() + v1 = state.init_folders() + v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0) + state.sync(options=[{'option': '--exclude', 'pattern': '/***'}]) + state.teardown() + + def test_sync4(self): + state = SyncMachine() + v1 = state.init_folders() + v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0) + state.sync(options=[{'option': '--exclude', 'pattern': '*/***'}]) + state.teardown() + + def test_sync5(self): + state = SyncMachine() + v1 = state.init_folders() + state.sync(options=[{'option': '--include', 'pattern': 'a'}]) + v2 = state.mkdir(mode=0, parent=v1, subdir='a', umask=0) + v3 = state.create_file(content=b'', file_name=v2, mode='w', parent=v2, umask=0) + state.sync(options=[{'option': '--include', 'pattern': 'aa'}, + {'option': '--exclude', 'pattern': 'a?**'}]) + state.teardown() + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/.github/scripts/command/sync.sh b/.github/scripts/sync/sync.sh similarity index 95% rename from .github/scripts/command/sync.sh rename to .github/scripts/sync/sync.sh index efe4018dedea..0a5fe573568d 100755 --- a/.github/scripts/command/sync.sh +++ b/.github/scripts/sync/sync.sh @@ -76,8 +76,8 @@ test_sync_with_loop_link(){ ./juicefs format $META_URL myjfs ./juicefs mount -d $META_URL /jfs ln -s looplink jfs_source/looplink - ./juicefs sync jfs_source/ /jfs/jfs_source/ $options > err.log 2>&1 || true - grep "Failed to handle 1 objects" err.log + ./juicefs sync jfs_source/ /jfs/jfs_source/ $options 2>&1 | tee err.log || true + grep -i "failed to handle 1 objects" err.log || (echo "grep failed" && exit 1) rm -rf jfs_source/looplink } @@ -91,7 +91,7 @@ test_sync_with_deep_link(){ ln -s symlink_$i jfs_source/symlink_$((i+1)) done ./juicefs sync jfs_source/ /jfs/jfs_source/ $options 2>&1 | tee err.log || true - grep "Failed to handle 1 objects" err.log + grep -i "failed to handle 1 objects" err.log || (echo "grep failed" && exit 1) rm -rf jfs_source/symlink_* } @@ -116,12 +116,6 @@ do_test_sync_fsrand_with_mount_point(){ diff -ur --no-dereference fsrand/ /jfs/fsrand/ } -test_sync_randomly(){ - prepare_test - [[ ! -d jfs_source ]] && git clone https://github.com/juicedata/juicefs.git jfs_source - META_URL=$META_URL python3 .github/scripts/testSync.py -} - test_sync_include_exclude_option(){ prepare_test ./juicefs format --trash-days 0 $META_URL myjfs diff --git a/.github/scripts/command/sync_cluster.sh b/.github/scripts/sync/sync_cluster.sh similarity index 100% rename from .github/scripts/command/sync_cluster.sh rename to .github/scripts/sync/sync_cluster.sh diff --git a/.github/scripts/command/sync_fsrand.sh b/.github/scripts/sync/sync_fsrand.sh similarity index 100% rename from .github/scripts/command/sync_fsrand.sh rename to .github/scripts/sync/sync_fsrand.sh index 17275c4ad19c..bc02e6daa828 100755 --- a/.github/scripts/command/sync_fsrand.sh +++ b/.github/scripts/sync/sync_fsrand.sh @@ -99,10 +99,11 @@ test_update(){ sudo -u $USER PROFILE=generate EXCLUDE_RULES=$EXCLUDE_RULES MAX_EXAMPLE=$MAX_EXAMPLE SEED=$SEED ROOT_DIR1=$SOURCE_DIR1 ROOT_DIR2=$SOURCE_DIR2 python3 .github/scripts/hypo/fsrand2.py || true # chmod 777 $SOURCE_DIR1 # chmod 777 $SOURCE_DIR2 + do_copy $sync_option for i in {1..5}; do sync_option+=" --update --delete-dst" - sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync $SOURCE_DIR1 jfs://meta_url/fsrand1/ $sync_option 2>&1| tee sync.log || true echo sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync $SOURCE_DIR1 jfs://meta_url/fsrand1/ $sync_option + sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync $SOURCE_DIR1 jfs://meta_url/fsrand1/ $sync_option 2>&1| tee sync.log || true if grep -q "Failed to delete" sync.log; then echo "failed to delete, retry sync" else @@ -110,7 +111,6 @@ test_update(){ break fi done - do_copy $sync_option check_diff $DEST_DIR1 $DEST_DIR2 } diff --git a/.github/scripts/command/sync_minio.sh b/.github/scripts/sync/sync_minio.sh similarity index 100% rename from .github/scripts/command/sync_minio.sh rename to .github/scripts/sync/sync_minio.sh diff --git a/.github/scripts/testSync.py b/.github/scripts/testSync.py deleted file mode 100644 index a5d876e9ef67..000000000000 --- a/.github/scripts/testSync.py +++ /dev/null @@ -1,140 +0,0 @@ -import subprocess -try: - __import__("hypothesis") -except ImportError: - subprocess.check_call(["pip", "install", "hypothesis"]) -import random -import shutil -try: - __import__("hypothesis") -except ImportError: - subprocess.check_call(["pip", "install", "hypothesis"]) - -from hypothesis import given, strategies as st, settings, example -import os - -JFS_SOURCE_DIR='/Users/chengzhou/Documents/juicefs/pkg/' -JFS_SOURCE_DIR='jfs_source/pkg/' -MOUNT_POINT='/jfs/' -JFS_BIN='./juicefs-1.0.0-beta1' -JFS_BIN='./juicefs-1.0.0-beta2' -JFS_BIN='./juicefs-1.0.0-beta3' -JFS_BIN='./juicefs' -MAX_EXAMPLE=100 - -def setup(): - meta_url = os.environ.get('META_URL', 'sqlite3://test.db') - volume_name='myjfs' - if os.path.isfile('test.db'): - os.remove('test.db') - if os.path.exists(MOUNT_POINT): - os.system('umount %s'%MOUNT_POINT) - cache_dir = os.path.expanduser('~/.juicefs/local/%s/'%volume_name) - if os.path.exists(cache_dir): - try: - shutil.rmtree(cache_dir) - except OSError as e: - print("Error: %s : %s" % (cache_dir, e.strerror)) - subprocess.check_call([JFS_BIN, 'format', meta_url, volume_name]) - subprocess.check_call([JFS_BIN, 'mount', '-d', meta_url, MOUNT_POINT]) - subprocess.check_call([JFS_BIN, 'sync', JFS_SOURCE_DIR, MOUNT_POINT+'jfs_source/']) - -def generate_all_entries(root_dir): - entries = set() - for root, dirs, files in os.walk(root_dir): - # print(root) - for d in dirs: - entries.add(d+'/') - for file in files: - entries.add(file) - file_path = os.path.join(root, file)[len(root_dir):] - entries.add(file_path) - print(len(entries)) - return entries - -def generate_nested_dir(root_dir): - result = [] - for root, dirs, files in os.walk(root_dir): - for d in dirs: - dir = os.path.join(root, d)[len(root_dir):] - li = dir.split('/') - entries = [] - for i in range(0, len(li)): - entries.append('/'.join(li[i:])+'/') - for i in range(0, len(entries)): - result.append(random.sample(entries, random.randint(0, min(len(entries), 5)) )) - print(result) - return result - -def change_entry(entries): - # entries = random.sample( entries, random.randint(0, min(len(entries), 5)) ) - options = [] - for entry in entries: - type = random.choice(['--include', '--exclude']) - value = entry.replace(random.choice(entry), random.choice(['*', '?']), random.randint(0,2)) - # print(type+' '+value) - options.append( (type, "'%s'"%value) ) - # print(options) - return options - -all_entry = generate_all_entries(JFS_SOURCE_DIR) -st_all_entry = st.lists(st.sampled_from(list(all_entry))).map(lambda x: change_entry(x)).filter(lambda x: len(x) != 0) -nested_dir = generate_nested_dir(JFS_SOURCE_DIR) -st_nested_dir = st.sampled_from(nested_dir).map(lambda x: change_entry(x)).filter(lambda x: len(x) != 0) -valid_name = st.text(st.characters(max_codepoint=1000, blacklist_categories=('Cc', 'Cs')), min_size=2).map(lambda s: s.strip()).filter(lambda s: len(s) > 0) -st_random_text = st.lists(valid_name).map(lambda x: change_entry(x)).filter(lambda x: len(x) != 0) - -@given(sync_options=st_random_text) -@example([['--include', '[*'] ]) -@settings(max_examples=MAX_EXAMPLE, deadline=None) -def test_sync_with_random_text(sync_options): - print(sync_options) - compare_rsync_and_juicesync(sync_options) - -@given(sync_options=st_all_entry) -@settings(max_examples=MAX_EXAMPLE, deadline=None) -def test_sync_with_path_entry(sync_options): - compare_rsync_and_juicesync(sync_options) - -@given(sync_options=st_nested_dir) -@example([ ['--include', 'chu*/'], ['--exclude', 'pk*/'], ['--exclude', '*.go'] ]) -@settings(max_examples=MAX_EXAMPLE, deadline=None) -def test_sync_with_nested_dir(sync_options): - compare_rsync_and_juicesync(sync_options) - -def compare_rsync_and_juicesync(sync_options): - assert sync_options != 0 - sync_options = [item for sublist in sync_options for item in sublist] - do_rsync(MOUNT_POINT+'jfs_source/', 'rsync_dir/', sync_options) - do_juicesync(MOUNT_POINT+'jfs_source/', 'juicesync_dir/', sync_options) - diff_result = os.system('diff -ur juicesync_dir rsync_dir') - assert diff_result==0 - -def do_juicesync(source_dir, dest_dir, sync_options): - if os.path.exists(dest_dir): - shutil.rmtree(dest_dir) - os.makedirs(dest_dir) - juicesync_cmd = [JFS_BIN , 'sync', '--dirs', source_dir, dest_dir]+sync_options - print('juicesync_cmd: '+' '.join(juicesync_cmd)) - try: - subprocess.check_call(juicesync_cmd) - except Exception as e: - assert False - -def do_rsync(source_dir, dest_dir, sync_options): - if os.path.exists(dest_dir): - shutil.rmtree(dest_dir) - os.makedirs(dest_dir) - rsync_cmd = ['rsync', '-a', '-r' , source_dir, dest_dir]+sync_options - print('rsync_cmd: '+ ' '.join(rsync_cmd)) - try: - subprocess.check_call(rsync_cmd) - except Exception as e: - assert False - -if __name__ == "__main__": - setup() - test_sync_with_random_text() - test_sync_with_nested_dir() - test_sync_with_path_entry() - diff --git a/.github/workflows/command.yml b/.github/workflows/command.yml index 1a0045305dd2..a51f689b42ce 100644 --- a/.github/workflows/command.yml +++ b/.github/workflows/command.yml @@ -150,69 +150,11 @@ jobs: # if: failure() timeout-minutes: 60 uses: lhotari/action-upterm@v1 - - command_test3: - needs: [build-matrix] - strategy: - fail-fast: true - matrix: - meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }} - runs-on: ubuntu-20.04 - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 1 - - - name: Build - uses: ./.github/actions/build - # with: - # useBeta: true - - - name: Test Sync - timeout-minutes: 30 - run: | - sudo META=${{matrix.meta}} .github/scripts/command/sync.sh - - - name: Test Sync with fsrand - timeout-minutes: 30 - run: | - sudo META=${{matrix.meta}} .github/scripts/command/sync_fsrand.sh - - - name: Test Sync with mino - timeout-minutes: 30 - run: | - sudo META=${{matrix.meta}} .github/scripts/command/sync_minio.sh - - - name: Test Sync with multi nodes - timeout-minutes: 30 - run: | - if [ "${{matrix.meta}}" != "redis" ]; then - echo "skip sync with multi nodes when meta is not redis" - exit 0 - fi - # not supported algo: "dsa" "ecdsa-sk" "ed25519-sk" - types=("ecdsa" "ed25519" "rsa") - random_type=${types[$RANDOM % ${#types[@]}]} - sudo CI=true META=${{matrix.meta}} KEY_TYPE=$random_type .github/scripts/command/sync_cluster.sh - - - name: Log - if: always() - run: | - echo "juicefs log" - sudo tail -n 1000 /var/log/juicefs.log - grep ":" /var/log/juicefs.log && exit 1 || true - - - name: Setup upterm session - if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1) - # if: failure() - timeout-minutes: 60 - uses: lhotari/action-upterm@v1 success-all-test: runs-on: ubuntu-latest - needs: [command_test1, command_test2, command_test3] + needs: [command_test1, command_test2] if: always() steps: - uses: technote-space/workflow-conclusion-action@v3 diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml new file mode 100644 index 000000000000..af3596d5c00c --- /dev/null +++ b/.github/workflows/sync.yml @@ -0,0 +1,80 @@ +name: "sync" + +on: + push: + branches: + - 'main' + - 'release-**' + paths-ignore: + - '**/sync.yml' + - '**/syncrand_test.py' + - '**/syncrand.py' + - '.github/scripts/sync/*.sh' + pull_request: + branches: + - 'main' + - 'release-**' + paths: + - '**/sync.yml' + - '**/syncrand_test.py' + - '**/syncrand.py' + - '.github/scripts/sync/*.sh' + schedule: + - cron: '30 20 * * *' + + workflow_dispatch: + inputs: + debug: + type: boolean + description: "Run the build with tmate debugging enabled" + required: false + default: false + +jobs: + sync: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Build + uses: ./.github/actions/build + # with: + # useBeta: true + + - name: Test Sync + timeout-minutes: 30 + run: | + sudo META=redis .github/scripts/sync/sync.sh + + - name: Test Sync with fsrand + timeout-minutes: 30 + run: | + sudo META=redis .github/scripts/sync/sync_fsrand.sh + + - name: Test Sync with mino + timeout-minutes: 30 + run: | + sudo META=redis .github/scripts/sync/sync_minio.sh + + - name: Test Sync with multi workers + timeout-minutes: 30 + run: | + # not supported algo: "dsa" "ecdsa-sk" "ed25519-sk" + types=("ecdsa" "ed25519" "rsa") + random_type=${types[$RANDOM % ${#types[@]}]} + sudo CI=true META=redis KEY_TYPE=$random_type .github/scripts/sync/sync_cluster.sh + + - name: Test sync include/exclude option + timeout-minutes: 30 + run: | + # sudo python3 .github/scripts/hypo/syncrand_test.py + sudo LOG_LEVEL=WARNING PROFILE=ci python3 .github/scripts/hypo/syncrand.py + + - name: Setup upterm session + if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1) + # if: failure() + timeout-minutes: 60 + uses: lhotari/action-upterm@v1