From ab039f91068e65b17c312cf0af62a228de274d7c Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Wed, 19 Aug 2020 13:58:13 -0500 Subject: [PATCH 1/5] Try 2 xrootd caches before going back to the redirector --- stashcp/__init__.py | 46 ++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/stashcp/__init__.py b/stashcp/__init__.py index 8268d31..c80d0ae 100755 --- a/stashcp/__init__.py +++ b/stashcp/__init__.py @@ -325,36 +325,52 @@ def download_xrootd(sourceFile, destination, debug, payload): xrd_exit = timed_transfer(filename=sourceFile, debug=debug, cache=cache, destination=destination) payload['xrdexit1']=xrd_exit + tries = 1 + payload['cache1'] = cache if xrd_exit=='0': #worked first try logging.debug("Transfer success using %s", nearest_cache) - payload['tries'] = 1 + status = "First Cache Success" payload['cache'] = cache - else: #pull from origin - logging.info("XrdCP from cache failed on %s, pulling from main redirector", nearest_cache) + if xrd_exit != '0': # pull from second nearest cache + cache = nearest_cache_list[1] + logging.info("XrdCP from cache failed on %s, pulling from second nearest cache %s", nearest_cache, cache) + xrd_exit = timed_transfer(filename=sourceFile, debug=debug, cache=cache, destination=destination) + payload['xrdexit2']=xrd_exit + payload['cache2'] = cache + + if xrd_exit=='0': + logging.info("Second Cache Success") + status = 'Second Cache Success' + payload['cache'] = cache + tries=2 + + if xrd_exit != '0': # pull from the origin + logging.info("XrdCP from cache failed on %s, pulling from main redirector", cache) cache = main_redirector xrd_exit=timed_transfer(filename=sourceFile, cache=cache, debug=debug, destination=destination) + payload['xrdexit3']=xrd_exit + payload['cache3'] = cache if xrd_exit=='0': logging.info("Trunk Success") status = 'Trunk Sucess' - tries=2 + payload['cache'] = cache + tries=3 else: - logging.info("stashcp failed after 2 xrootd attempts") + logging.info("stashcp failed after 3 xrootd attempts") status = 'Timeout' - tries = 2 + tries = 3 - payload['status']=status - payload['xrdexit2']=xrd_exit - payload['tries']=tries - payload['cache'] = cache + payload['status']=status + + payload['tries']=tries - if xrd_exit == '0': - return True - else: - return False - return True + if xrd_exit == '0': + return True + else: + return False def check_for_xrootd(): """ From f7df5a4dabf6634dba84b60977f177397ab656b9 Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Wed, 19 Aug 2020 14:12:54 -0500 Subject: [PATCH 2/5] Check that there are more caches to try --- stashcp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stashcp/__init__.py b/stashcp/__init__.py index c80d0ae..1fa5aae 100755 --- a/stashcp/__init__.py +++ b/stashcp/__init__.py @@ -333,7 +333,7 @@ def download_xrootd(sourceFile, destination, debug, payload): status = "First Cache Success" payload['cache'] = cache - if xrd_exit != '0': # pull from second nearest cache + if xrd_exit != '0' and len(nearest_cache_list) >= 2: # pull from second nearest cache cache = nearest_cache_list[1] logging.info("XrdCP from cache failed on %s, pulling from second nearest cache %s", nearest_cache, cache) xrd_exit = timed_transfer(filename=sourceFile, debug=debug, cache=cache, destination=destination) From f47ee676ef25fc0f5bb9b3ace2b26e0f628d8237 Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Thu, 20 Aug 2020 10:47:54 -0500 Subject: [PATCH 3/5] Try 3 caches for xrootd, then fail --- stashcp/__init__.py | 58 +++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 41 deletions(-) diff --git a/stashcp/__init__.py b/stashcp/__init__.py index 1fa5aae..17f0f6d 100755 --- a/stashcp/__init__.py +++ b/stashcp/__init__.py @@ -319,52 +319,28 @@ def download_xrootd(sourceFile, destination, debug, payload): if not nearest_cache: logging.error("No cache found") return False - cache = nearest_cache - logging.debug("Using Cache %s", nearest_cache) - xrd_exit = timed_transfer(filename=sourceFile, debug=debug, cache=cache, destination=destination) - - payload['xrdexit1']=xrd_exit - tries = 1 - payload['cache1'] = cache - - if xrd_exit=='0': #worked first try - logging.debug("Transfer success using %s", nearest_cache) - status = "First Cache Success" - payload['cache'] = cache - - if xrd_exit != '0' and len(nearest_cache_list) >= 2: # pull from second nearest cache - cache = nearest_cache_list[1] - logging.info("XrdCP from cache failed on %s, pulling from second nearest cache %s", nearest_cache, cache) + # Try 3 times to download from the 3 nearest caches + num_available_caches = len(nearest_cache_list) + tries = 0 + xrd_exit = "" + for cache_idx in range(min(3, num_available_caches)): # try 3 caches, or how ever many caches are in the list + tries = cache_idx+1 + cache = nearest_cache_list[cache_idx] + logging.debug("Using Cache %s", cache) xrd_exit = timed_transfer(filename=sourceFile, debug=debug, cache=cache, destination=destination) - payload['xrdexit2']=xrd_exit - payload['cache2'] = cache - - if xrd_exit=='0': - logging.info("Second Cache Success") - status = 'Second Cache Success' - payload['cache'] = cache - tries=2 - - if xrd_exit != '0': # pull from the origin - logging.info("XrdCP from cache failed on %s, pulling from main redirector", cache) - cache = main_redirector - xrd_exit=timed_transfer(filename=sourceFile, cache=cache, debug=debug, destination=destination) - payload['xrdexit3']=xrd_exit - payload['cache3'] = cache - - if xrd_exit=='0': - logging.info("Trunk Success") - status = 'Trunk Sucess' - payload['cache'] = cache - tries=3 + payload['cache' + str(tries)] = cache + payload['xrdexit' + str(tries)] = xrd_exit + + if xrd_exit=='0': # Transfer worked + logging.debug("Transfer success using %s", cache) + status = "Cache Success" + break # Break out of the for loop, transfer worked! else: - logging.info("stashcp failed after 3 xrootd attempts") - status = 'Timeout' - tries = 3 + logging.debug("xrdcp from cache failed on %s, pulling from next nearest cache", cache) + status = "Cache Download Failure" payload['status']=status - payload['tries']=tries if xrd_exit == '0': From f6bcdcc5d16a76eece6d65d4ddf4d3126699b0fa Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Thu, 20 Aug 2020 10:49:57 -0500 Subject: [PATCH 4/5] Try 3 caches when using HTTP as well --- stashcp/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/stashcp/__init__.py b/stashcp/__init__.py index 17f0f6d..a29d66e 100755 --- a/stashcp/__init__.py +++ b/stashcp/__init__.py @@ -404,7 +404,10 @@ def download_http(source, destination, debug, payload): success = False start = end = 0 tried_cache = "" - for cache in nearest_cache_list[:2]: + tries = 0 + # Try the 3 nearest caches + for cache in nearest_cache_list[:min(3, len(nearest_cache_list)]: + tries = tries + 1 tried_cache = cache # Parse the nearest_cache url, make sure it uses http # Should really use urlparse, but python3 and python2 urlparse imports are @@ -447,7 +450,7 @@ def download_http(source, destination, debug, payload): payload['filesize'] = filesize payload['host']=tried_cache - payload['tries']=1 + payload['tries']=tries payload['cache']=tried_cache if success: return True From bab9613f0c95d286e08462628754b0a5aed2bc81 Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Thu, 20 Aug 2020 14:09:43 -0500 Subject: [PATCH 5/5] Use 4 hosts as backup --- stashcp/__init__.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/stashcp/__init__.py b/stashcp/__init__.py index a29d66e..b4aa70d 100755 --- a/stashcp/__init__.py +++ b/stashcp/__init__.py @@ -324,7 +324,7 @@ def download_xrootd(sourceFile, destination, debug, payload): num_available_caches = len(nearest_cache_list) tries = 0 xrd_exit = "" - for cache_idx in range(min(3, num_available_caches)): # try 3 caches, or how ever many caches are in the list + for cache_idx in range(min(4, num_available_caches)): # try 4 caches, or how ever many caches are in the list tries = cache_idx+1 cache = nearest_cache_list[cache_idx] logging.debug("Using Cache %s", cache) @@ -400,13 +400,12 @@ def download_http(source, destination, debug, payload): download_output = "-O" final_destination = os.path.join(dest_dir, os.path.basename(source)) - # Try 2 nearest caches success = False start = end = 0 tried_cache = "" tries = 0 - # Try the 3 nearest caches - for cache in nearest_cache_list[:min(3, len(nearest_cache_list)]: + # Try the 4 nearest caches + for cache in nearest_cache_list[:min(4, len(nearest_cache_list))]: tries = tries + 1 tried_cache = cache # Parse the nearest_cache url, make sure it uses http