From 4a970a6c4074be9caf552b038c4b0f6dbd71af4e Mon Sep 17 00:00:00 2001 From: Laszlo Bodor Date: Mon, 6 May 2024 10:14:13 +0200 Subject: [PATCH] TEZ-4559: Fix Retry logic in case of Recovery --- .../org/apache/tez/dag/api/client/DAGClientImpl.java | 10 ++++++++-- .../apache/tez/dag/api/client/rpc/TestDAGClient.java | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java index 727719eccc..2913d08c41 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java @@ -410,8 +410,14 @@ private DAGStatus getDAGStatusViaAM(@Nullable Set statusOptions, LOG.info("DAG is no longer running - application not found by YARN", e); dagCompleted = true; } catch (NoCurrentDAGException e) { - LOG.info("Got NoCurrentDAGException from AM, returning a failed DAG", e); - return dagLost(); + if (conf.getBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, + TezConfiguration.DAG_RECOVERY_ENABLED_DEFAULT)) { + LOG.info("Got NoCurrentDAGException from AM, going on as recovery is enabled", e); + } else { + // if recovery is disabled, we're not expecting the DAG to be finished any time in the future + LOG.info("Got NoCurrentDAGException from AM, returning a failed DAG as recovery is disabled", e); + return dagLost(); + } } catch (TezException e) { // can be either due to a n/w issue or due to AM completed. LOG.info("Cannot retrieve DAG Status due to TezException: {}", e.getMessage()); diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java b/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java index 662de982fd..8d52aaf3b9 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java @@ -715,6 +715,7 @@ public void testGetDagStatusWithCachedStatusExpiration() throws Exception { @Test public void testDagClientReturnsFailedDAGOnNoCurrentDAGException() throws Exception { TezConfiguration tezConf = new TezConfiguration(); + tezConf.setBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, false); try (DAGClientImplForTest dagClientImpl = new DAGClientImplForTest(mockAppId, dagIdStr, tezConf, null)) {