From 7baabe236660593114b8cc96ad288a7d6739e5e5 Mon Sep 17 00:00:00 2001 From: XavierChanth Date: Thu, 19 Dec 2024 09:23:09 -0500 Subject: [PATCH 1/4] fix: stable monitor connections in c sshnpd --- packages/c/cmake/atsdk.cmake | 2 +- .../c/sshnpd/include/sshnpd/handler_commons.h | 2 +- .../c/sshnpd/include/sshnpd/run_srv_process.h | 2 +- packages/c/sshnpd/src/handle_npt_request.c | 2 +- packages/c/sshnpd/src/handle_ssh_request.c | 2 +- packages/c/sshnpd/src/handler_commons.c | 2 +- packages/c/sshnpd/src/main.c | 83 ++++++++++++------- packages/c/sshnpd/src/run_srv_process.c | 2 +- 8 files changed, 62 insertions(+), 35 deletions(-) diff --git a/packages/c/cmake/atsdk.cmake b/packages/c/cmake/atsdk.cmake index 24766f0b0..059b45372 100644 --- a/packages/c/cmake/atsdk.cmake +++ b/packages/c/cmake/atsdk.cmake @@ -3,7 +3,7 @@ if(NOT atsdk_FOUND) FetchContent_Declare( atsdk GIT_REPOSITORY https://github.com/atsign-foundation/at_c.git - GIT_TAG 9b9cb315a8af08a19e9679c9dad98572536f2ee4 + GIT_TAG c37093b8b6ab4bf97f277f9752ef1975e67ed762 ) FetchContent_MakeAvailable(atsdk) install(TARGETS atclient atchops atlogger) diff --git a/packages/c/sshnpd/include/sshnpd/handler_commons.h b/packages/c/sshnpd/include/sshnpd/handler_commons.h index d1c763e26..69d4f0cac 100644 --- a/packages/c/sshnpd/include/sshnpd/handler_commons.h +++ b/packages/c/sshnpd/include/sshnpd/handler_commons.h @@ -1,8 +1,8 @@ #ifndef HANDLER_COMMONS_H #define HANDLER_COMMONS_H #include "sshnpd/params.h" -#include #include +#include #include #define BYTES(x) (sizeof(unsigned char) * x) diff --git a/packages/c/sshnpd/include/sshnpd/run_srv_process.h b/packages/c/sshnpd/include/sshnpd/run_srv_process.h index f916fff37..7d6abda40 100644 --- a/packages/c/sshnpd/include/sshnpd/run_srv_process.h +++ b/packages/c/sshnpd/include/sshnpd/run_srv_process.h @@ -1,7 +1,7 @@ #ifndef RUN_SRV_H #define RUN_SRV_H -#include +#include #include #include diff --git a/packages/c/sshnpd/src/handle_npt_request.c b/packages/c/sshnpd/src/handle_npt_request.c index 9c46dc90d..d1df063c3 100644 --- a/packages/c/sshnpd/src/handle_npt_request.c +++ b/packages/c/sshnpd/src/handle_npt_request.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/packages/c/sshnpd/src/handle_ssh_request.c b/packages/c/sshnpd/src/handle_ssh_request.c index e5b161fa5..59e8386a2 100644 --- a/packages/c/sshnpd/src/handle_ssh_request.c +++ b/packages/c/sshnpd/src/handle_ssh_request.c @@ -3,10 +3,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include diff --git a/packages/c/sshnpd/src/handler_commons.c b/packages/c/sshnpd/src/handler_commons.c index 9038a4c64..49f45d7ee 100644 --- a/packages/c/sshnpd/src/handler_commons.c +++ b/packages/c/sshnpd/src/handler_commons.c @@ -8,7 +8,7 @@ #include "sshnpd/sshnpd.h" #include #include -#include +#include #include #include #include diff --git a/packages/c/sshnpd/src/main.c b/packages/c/sshnpd/src/main.c index ba68bc238..748c5de47 100644 --- a/packages/c/sshnpd/src/main.c +++ b/packages/c/sshnpd/src/main.c @@ -16,12 +16,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include #include @@ -38,6 +38,10 @@ #define FILENAME_BUFFER_SIZE 500 #define LOGGER_TAG "sshnpd" +#define MONITOR_READ_TIMEOUT_MS 5000 +// How often to try to reconnect if the connect appears stale +#define MONITOR_NOOP_TIMEOUT_MS 40000 + static struct { char *str; enum notification_key key; @@ -57,6 +61,7 @@ static int lock_atclient(void); static int unlock_atclient(int); static int reconnect_atclient(); +static int reconnect_monitor(); static int set_worker_hooks(); static void main_loop(); @@ -178,6 +183,7 @@ int main(int argc, char **argv) { // 7.a Initialize the monitor atclient atclient_init(&monitor_ctx); + atclient_set_read_timeout(&monitor_ctx, MONITOR_READ_TIMEOUT_MS); // 5 seconds for timeout res = atclient_monitor_pkam_authenticate(&monitor_ctx, params.atsign, &atkeys, NULL); if (res != 0 || !should_run) { exit_res = res; @@ -205,12 +211,12 @@ int main(int argc, char **argv) { // atclient_get_public_encryption_key(&atclient, params.manager_list[i], &public_encryption_key); // TODO: finish caching } + printf("\n"); if (params.policy == NULL) { atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_DEBUG, "Policy Manager: NULL"); } else { atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_DEBUG, "Policy Manager: %s", params.policy); } - printf("\n"); if (!should_run) { exit_res = res; @@ -408,49 +414,47 @@ void main_loop() { permitopen.permitopen_hosts = params.permitopen_hosts; permitopen.permitopen_ports = params.permitopen_ports; + size_t timeout_counter = 0; + while (should_run) { atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_DEBUG, "Waiting for next monitor thread message\n"); atclient_monitor_response_init(&message); + int ret; + if (timeout_counter * MONITOR_READ_TIMEOUT_MS > MONITOR_NOOP_TIMEOUT_MS) { + // Do noop & reconnect if needed + ret = reconnect_monitor(); + if (ret != 0) { + timeout_counter = MONITOR_NOOP_TIMEOUT_MS / MONITOR_READ_TIMEOUT_MS + 1; + atclient_monitor_response_free(&message); + break; + } else { + timeout_counter = 0; + } + } + // Read the next monitor message - int ret = atclient_monitor_read(&monitor_ctx, &worker, &message, &monitor_hooks); + ret = atclient_monitor_read(&monitor_ctx, &worker, &message, &monitor_hooks); if (ret != 0) { - atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Possible bad state: monitor read failed (ret: %d)\n", - ret); + atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, + "Possible bad state: monitor read failed, resetting connection (ret: %d)\n", ret); + timeout_counter = MONITOR_NOOP_TIMEOUT_MS / MONITOR_READ_TIMEOUT_MS + 1; } atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_DEBUG, "Received message of type: %d\n", message.type); - switch (message.type) { case ATCLIENT_MONITOR_MESSAGE_TYPE_EMPTY: - // We got a timeout, nothing to read, nothing to do + timeout_counter++; break; case ATCLIENT_MONITOR_ERROR_READ: - if (!atclient_monitor_is_connected(&monitor_ctx)) { - atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, - "Seems the monitor connection is down, trying to reconnect\n"); - - int ret = atclient_monitor_pkam_authenticate(&monitor_ctx, params.atsign, &atkeys, NULL); - if (ret != 0) { - atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, - "Monitor connection failed to reconnect, trying again in 1 second...\n"); - sleep(1); - break; - } - - ret = atclient_monitor_start(&monitor_ctx, regex); - if (ret != 0) { - atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Monitor verb failed to restart.\n"); - break; - } - - atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_INFO, "Reconnected the monitor connection.\n"); - } + timeout_counter = MONITOR_NOOP_TIMEOUT_MS / MONITOR_READ_TIMEOUT_MS + 1; break; case ATCLIENT_MONITOR_MESSAGE_TYPE_DATA_RESPONSE: + timeout_counter = 0; atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_DEBUG, "Received a data response: %s\n", message.data_response); break; case ATCLIENT_MONITOR_MESSAGE_TYPE_ERROR_RESPONSE: + timeout_counter = 0; atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Received an error response: %s\n", message.error_response); break; @@ -458,12 +462,15 @@ void main_loop() { atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Received a NONE notification type\n"); break; case ATCLIENT_MONITOR_ERROR_PARSE_NOTIFICATION: + timeout_counter = MONITOR_NOOP_TIMEOUT_MS + 1; atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Failed to parse the notification\n"); break; case ATCLIENT_MONITOR_ERROR_DECRYPT_NOTIFICATION: + timeout_counter = MONITOR_NOOP_TIMEOUT_MS + 1; atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Failed to decrypt the notification\n"); break; case ATCLIENT_MONITOR_MESSAGE_TYPE_NOTIFICATION: { + timeout_counter = 0; bool is_init = atclient_atnotification_is_decrypted_value_initialized(&message.notification); bool has_key = atclient_atnotification_is_key_initialized(&message.notification); if (is_init) { @@ -522,7 +529,6 @@ void main_loop() { // DO NOT USE permitopen, use npa_permitopen } - // TODO: maybe multithread these handlers switch (notification_key) { case NK_SSHPUBLICKEY: atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_DEBUG, "Executing handle_sshpublickey\n"); @@ -622,3 +628,24 @@ static int reconnect_atclient() { exit: return ret; } + +static int reconnect_monitor() { + atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Seems the monitor connection is down, trying to reconnect\n"); + + int ret = atclient_monitor_pkam_authenticate(&monitor_ctx, params.atsign, &atkeys, NULL); + if (ret != 0) { + atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, + "Monitor connection failed to reconnect, trying again in 1 second...\n"); + sleep(1); + return ret; + } + + ret = atclient_monitor_start(&monitor_ctx, regex); + if (ret != 0) { + atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_ERROR, "Monitor verb failed to restart.\n"); + return ret; + } + + atlogger_log(LOGGER_TAG, ATLOGGER_LOGGING_LEVEL_INFO, "Reconnected the monitor connection.\n"); + return 0; +} diff --git a/packages/c/sshnpd/src/run_srv_process.c b/packages/c/sshnpd/src/run_srv_process.c index f696a6773..48e075e60 100644 --- a/packages/c/sshnpd/src/run_srv_process.c +++ b/packages/c/sshnpd/src/run_srv_process.c @@ -1,6 +1,6 @@ #include "srv/params.h" #include "srv/srv.h" -#include +#include #include #include #include From 9ecd5aa71994a983261c5e629ada209c31f2a8c5 Mon Sep 17 00:00:00 2001 From: XavierChanth Date: Thu, 19 Dec 2024 16:34:25 -0500 Subject: [PATCH 2/4] chore: bump c sshnpd version --- packages/c/sshnpd/CHANGELOG.md | 5 +++++ packages/c/sshnpd/include/sshnpd/version.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/c/sshnpd/CHANGELOG.md b/packages/c/sshnpd/CHANGELOG.md index 6ef901fcf..ca1d922c7 100644 --- a/packages/c/sshnpd/CHANGELOG.md +++ b/packages/c/sshnpd/CHANGELOG.md @@ -1,3 +1,8 @@ +## 0.2.6 + +- fix: stabilize monitor connection + - automatic failover / reconnect after ~40 seconds of down time + ## 0.2.5 - fix: uptake some fixes in monitor diff --git a/packages/c/sshnpd/include/sshnpd/version.h b/packages/c/sshnpd/include/sshnpd/version.h index 58a83c2d6..11c277197 100644 --- a/packages/c/sshnpd/include/sshnpd/version.h +++ b/packages/c/sshnpd/include/sshnpd/version.h @@ -1,4 +1,4 @@ #ifndef SSHNPD_VERSION_H #define SSHNPD_VERSION_H -#define SSHNPD_VERSION "0.2.5" +#define SSHNPD_VERSION "0.2.6" #endif From f1a9132fecd9e8a194eec76d80876feec1be16bc Mon Sep 17 00:00:00 2001 From: XavierChanth Date: Thu, 19 Dec 2024 16:37:42 -0500 Subject: [PATCH 3/4] fix: break->continue since this expression was moved out of the loop --- packages/c/sshnpd/src/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/c/sshnpd/src/main.c b/packages/c/sshnpd/src/main.c index 748c5de47..de89f6fe4 100644 --- a/packages/c/sshnpd/src/main.c +++ b/packages/c/sshnpd/src/main.c @@ -427,7 +427,7 @@ void main_loop() { if (ret != 0) { timeout_counter = MONITOR_NOOP_TIMEOUT_MS / MONITOR_READ_TIMEOUT_MS + 1; atclient_monitor_response_free(&message); - break; + continue; } else { timeout_counter = 0; } From edc2cbeb524767c68b119d2d391d42c0d212cd8b Mon Sep 17 00:00:00 2001 From: XavierChanth Date: Fri, 20 Dec 2024 15:26:47 -0500 Subject: [PATCH 4/4] chore: bump atsdk version - monitor bug fix & atdirectory returns null bug fix --- packages/c/cmake/atsdk.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/c/cmake/atsdk.cmake b/packages/c/cmake/atsdk.cmake index 059b45372..cbd7365e1 100644 --- a/packages/c/cmake/atsdk.cmake +++ b/packages/c/cmake/atsdk.cmake @@ -3,7 +3,7 @@ if(NOT atsdk_FOUND) FetchContent_Declare( atsdk GIT_REPOSITORY https://github.com/atsign-foundation/at_c.git - GIT_TAG c37093b8b6ab4bf97f277f9752ef1975e67ed762 + GIT_TAG ad8a9a0d154f06a0d7af52bfef27c1334871bfc0 ) FetchContent_MakeAvailable(atsdk) install(TARGETS atclient atchops atlogger)