Skip to content

Commit

Permalink
Merge pull request #77 from ARGOeu/devel
Browse files Browse the repository at this point in the history
Version 0.4.4
  • Loading branch information
themiszamani authored Jun 19, 2018
2 parents 4e578b2 + a9bd4a5 commit 0eb56ea
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 39 deletions.
6 changes: 5 additions & 1 deletion argo-ncg.spec
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

Summary: ARGO Nagios config generator
Name: argo-ncg
Version: 0.4.3
Version: 0.4.4
Release: 1%{?dist}
License: ASL 2.0
Group: Network/Monitoring
Expand All @@ -14,6 +14,10 @@ Obsoletes: grid-monitoring-config-gen-nagios grid-monitoring-config-gen ncg-metr
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
BuildArch: noarch
Requires: perl-libwww-perl > 5.833-2
Requires: psmisc
%if 0%{?el7:1}
Requires: perl(LWP::Protocol::https)
%endif

%description
(NULL)
Expand Down
39 changes: 39 additions & 0 deletions config/ncg-metric-config.d/argo.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"argo.AMSPublisher-Check" : {
"parameter" : {
"-s" : "/var/run/argo-nagios-ams-publisher/sock",
"-q" : "'w:metrics+g:published360' -c 8000 -q 'w:alarms+g:published360' -c 1 -q 'w:metricsdevel+g:published360' -c 8000"
},
"config" : {
"interval" : 360,
"maxCheckAttempts" : 1,
"path" : "/usr/libexec/argo-monitoring/probes/argo",
"retryInterval" : 1,
"timeout" : 120
},
"flags" : {
"NOHOSTNAME" : 1,
"NOTIMEOUT" : 1
},
"probe" : "ams-publisher-probe"
},
"org.nagios.AmsDirSize" : {
"parameter" : {
"-d" : "/var/spool/argo-nagios-ams-publisher",
"-w" : 10000,
"-c" : 100000,
"-f" : 0
},
"config" : {
"interval" : 60,
"maxCheckAttempts" : 3,
"path" : "/usr/libexec/argo-monitoring/probes/nagiosexchange",
"retryInterval" : 5,
"timeout" : 15
},
"flags" : {
"NOHOSTNAME" : 1
},
"probe" : "check_dirsize.sh"
}
}
24 changes: 24 additions & 0 deletions config/ncg-metric-config.d/midmon.conf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,30 @@
},
"probe" : "midmon/check_bdii_entries_num"
},
"eu.egi.sec.dCache-3.0" : {
"attribute" : {
"SITE_BDII" : "-H",
"BDII_PORT" : "-p"
},
"parameter" : {
"-w" : "0:0",
"-f" : "\"(&(GlueSEImplementationName=dCache)(GlueSEImplementationVersion=3.0.*)(GlueSEUniqueID=*$HOSTNAME$*))\"",
"-b" : "O=grid"
},
"flags" : {
"NOHOSTNAME" : 1,
"OBSESS" :1
},
"docurl" : "https://wiki.egi.eu/wiki/MW_Nagios_tests",
"config" : {
"maxCheckAttempts" : 1,
"timeout" : 120,
"path" : "/usr/libexec/argo-monitoring/probes",
"retryInterval" : 15,
"interval" : 1440
},
"probe" : "midmon/check_bdii_entries_num"
},
"org.bdii.GLUE2-Validate" : {
"flags" : {
"NRPE" : 1,
Expand Down
2 changes: 1 addition & 1 deletion config/templates/commands.template
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ define command{

define command {
command_name handle_service_check
command_line /usr/libexec/argo-msg-nagios/handle_service_check --role=<NAGIOS_ROLE> --tenant=<TENANT> --send-to-msg=<SEND_TO_MSG> && /usr/bin/ams-metric-to-queue --queue /var/spool/argo-nagios-ams-publisher/metrics/ /var/spool/argo-nagios-ams-publisher/metricsdevel/ --service "$_SERVICESERVICE_FLAVOUR$" --hostname "$HOSTNAME$" --metric "$_SERVICEMETRIC_NAME$" --status "$SERVICESTATE$" --summary "$SERVICEOUTPUT$" --message "$LONGSERVICEOUTPUT$" --vofqan "$_SERVICEVO_FQAN$" --voname "$_SERVICEVO$" --roc "$_SERVICEROC$" --servicestatetype "$SERVICESTATETYPE$"
command_line /usr/libexec/argo-msg-nagios/handle_service_check --role=<NAGIOS_ROLE> --tenant=<TENANT> --send-to-msg=<SEND_TO_MSG> && /usr/bin/ams-metric-to-queue --queue /var/spool/argo-nagios-ams-publisher/metrics/ /var/spool/argo-nagios-ams-publisher/metricsdevel/ --service "$_SERVICESERVICE_FLAVOUR$" --hostname "$HOSTNAME$" --metric "$_SERVICEMETRIC_NAME$" --status "$SERVICESTATE$" --summary "$SERVICEOUTPUT$" --message "$LONGSERVICEOUTPUT$" --vofqan "$_SERVICEVO_FQAN$" --voname "$_SERVICEVO$" --roc "$_SERVICEROC$" --servicestatetype "$SERVICESTATETYPE$" --actual_data "$SERVICEPERFDATA$"
}

define command {
Expand Down
14 changes: 1 addition & 13 deletions src/modules/NCG/LocalMetrics/Hash.pm
Original file line number Diff line number Diff line change
Expand Up @@ -426,19 +426,6 @@ $WLCG_SERVICE->{'org.nagios.MsgDirSize'}->{parameter}->{'-w'} = '10000';
$WLCG_SERVICE->{'org.nagios.MsgDirSize'}->{parameter}->{'-c'} = '100000';
$WLCG_SERVICE->{'org.nagios.MsgDirSize'}->{parameter}->{'-f'} = '';

$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{probe} = "check_dirsize.sh";
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{config}->{timeout} = 15;
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{config}->{interval} = 60;
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{config}->{retryInterval} = 5;
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{config}->{maxCheckAttempts} = 3;
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{config}->{path} = '/usr/libexec/argo-monitoring/probes/nagiosexchange';
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{flags}->{NOHOSTNAME} = 1;
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{flags}->{PNP} = 1;
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{parameter}->{'-d'} = '/var/spool/argo-nagios-ams-publisher';
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{parameter}->{'-w'} = '10000';
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{parameter}->{'-c'} = '100000';
$WLCG_SERVICE->{'org.nagios.AmsDirSize'}->{parameter}->{'-f'} = '';

$WLCG_SERVICE->{'org.nagios.ProcessMsgToHandler'}->{probe} = 'check_procs';
$WLCG_SERVICE->{'org.nagios.ProcessMsgToHandler'}->{config}->{path} = $NCG::NCG_PROBES_PATH_NAGIOS;
$WLCG_SERVICE->{'org.nagios.ProcessMsgToHandler'}->{config}->{interval} = 15;
Expand Down Expand Up @@ -602,6 +589,7 @@ $WLCG_NODETYPE->{internal}->{"NAGIOS"} = [
'emi.wms.WMS-JobMonit',
'org.nordugrid.ARC-CE-monitor',
'org.nordugrid.ARC-CE-clean',
'argo.AMSPublisher-Check',
];

$WLCG_NODETYPE->{internal}->{"NRPE"} = [
Expand Down
9 changes: 8 additions & 1 deletion src/modules/NCG/LocalMetricsAttrs/Active.pm
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,10 @@ sub _analyzeURLs {
}

if ($attr = $self->{SITEDB}->hostAttribute($hostname, "eu.egi.cloud.vm-management.occi_URL")) {
my $port;
if ($attr =~ /(\S+?:\/\/)?([-_.A-Za-z0-9]+):(\d+)/ ) {
$port = $3;
}
eval {my $occiHash = {};
my $occiurl = url($attr);
$self->{SITEDB}->hostAttribute($hostname, 'OCCI_PORT', $occiurl->port);
Expand All @@ -392,7 +396,10 @@ sub _analyzeURLs {
$occiHash->{$key} = $value;
}
$self->{SITEDB}->hostAttribute($hostname, 'OCCI_SCHEME', $occiurl->scheme);
$self->{SITEDB}->hostAttribute($hostname, 'OCCI_URL', $occiurl->scheme."://".$occiurl->host.":".$occiurl->port.$occiurl->epath);
my $occiurlAttr = $occiurl->scheme."://".$occiurl->host;
$occiurlAttr .= ":" . $port if ($port);
$occiurlAttr .= $occiurl->epath;
$self->{SITEDB}->hostAttribute($hostname, 'OCCI_URL', $occiurlAttr);
if (!exists $occiHash->{OCCI_RESOURCE}) {
if (!exists $occiHash->{OCCI_PLATFORM}) {
$self->{SITEDB}->hostAttribute($hostname, 'OCCI_RESOURCE', 'small');
Expand Down
15 changes: 13 additions & 2 deletions src/modules/NCG/SiteContacts/GOCDB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ sub getData {
}

my $req = HTTP::Request->new(GET => $url);

if ( $self->{USERNAME} && $self->{PASSWORD} ) {
$req->authorization_basic($self->{USERNAME}, $self->{PASSWORD});
}

my $res = $self->safeHTTPSCall($ua,$req);
if (!$res->is_success) {
$self->error("Could not get results from GOCDB: ".$res->status_line);
Expand Down Expand Up @@ -209,8 +214,14 @@ can contains following elements:
ROC - roc name must be set in case when CONTACT_TYPE is set to 'roc'.
TIMEOUT - HTTP timeout,
(default: DEFAULT_HTTP_TIMEOUT inherited from NCG)
TIMEOUT - HTTP timeout
- default: DEFAULT_HTTP_TIMEOUT inherited from NCG
USERNAME - username for basic authentication
- default: undefined
PASSWORD - password for basic authentication
- default: undefined
=item C<getData>
Expand Down
34 changes: 32 additions & 2 deletions src/modules/NCG/SiteInfo/GOCDB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ sub getData {
}
my $req = HTTP::Request->new(GET => $url);

if ( $self->{USERNAME} && $self->{PASSWORD} ) {
$req->authorization_basic($self->{USERNAME}, $self->{PASSWORD});
}

my $res = $self->safeHTTPSCall($ua,$req);
if (!$res->is_success) {
$self->error("Could not get results from GOCDB: ".$res->status_line);
Expand Down Expand Up @@ -208,6 +212,26 @@ sub getData {
}
}
}
foreach $elem ($site->getElementsByTagName("EXTENSIONS")) {
foreach my $ext ($elem->getElementsByTagName("EXTENSION")) {
my ($elemInt, $keyInt, $valueInt);
foreach $elemInt ($ext->getElementsByTagName("KEY")) {
my $value = $elemInt->getFirstChild->getNodeValue();
if ($value) {
$keyInt = $value;
}
}
foreach $elemInt ($ext->getElementsByTagName("VALUE")) {
my $value = $elemInt->getFirstChild->getNodeValue();
if ($value) {
$valueInt = $value;
}
}
if ($keyInt && $valueInt) {
$self->{SITEDB}->hostAttribute($hostname, $keyInt, $valueInt);
}
}
}
}

if (!$self->{SITEDB}->siteCountry()) {
Expand Down Expand Up @@ -267,8 +291,14 @@ can contains following elements:
SCOPE - scope of services (for possible values see GOCDB documentation)
- default: undefined
TIMEOUT - HTTP timeout,
(default: DEFAULT_HTTP_TIMEOUT inherited from NCG)
TIMEOUT - HTTP timeout
- default: DEFAULT_HTTP_TIMEOUT inherited from NCG
USERNAME - username for basic authentication
- default: undefined
PASSWORD - password for basic authentication
- default: undefined
=back
Expand Down
15 changes: 13 additions & 2 deletions src/modules/NCG/SiteSet/GOCDB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ sub getData
}

my $req = HTTP::Request->new(GET => $url);

if ( $self->{USERNAME} && $self->{PASSWORD} ) {
$req->authorization_basic($self->{USERNAME}, $self->{PASSWORD});
}

my $res = $self->safeHTTPSCall($ua,$req);
if (!$res->is_success) {
$self->error("Could not get results from GOCDB: ".$res->status_line);
Expand Down Expand Up @@ -208,8 +213,14 @@ can contains following elements:
SUBGRID_EXCLUDE - comma separated list of Subgrids to exclude.
TIMEOUT - HTTP timeout,
(default: DEFAULT_HTTP_TIMEOUT inherited from NCG)
TIMEOUT - HTTP timeout
- default: DEFAULT_HTTP_TIMEOUT inherited from NCG
USERNAME - username for basic authentication
- default: undefined
PASSWORD - password for basic authentication
- default: undefined
=back
Expand Down
3 changes: 1 addition & 2 deletions src/ncg.pl
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,7 @@ sub getConfig {
my %options = (-ConfigFile => $CONFIG_FILE, -AllowMultiOptions => 1,
-InterPolateVars => 1, -InterPolateEnv => 1,
-IncludeGlob => 1, -UseApacheInclude => 1,
-IncludeRelative => 1, -IncludeDirectories => 1,
-AutoTrue => 1);
-IncludeRelative => 1, -IncludeDirectories => 1);
my $config = new Config::General(%options);
if (!$config) {
alarm(0);
Expand Down
23 changes: 8 additions & 15 deletions src/ncg.reload.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/bin/sh

NAGIOS_RUNNING=1
NCG_TIMEOUT=1800
OUTPUT_DIR_TMP=/etc/nagios/argo-ncg.d.tmp.$$
CONFIG_FILE_TMP=/etc/nagios/nagios.cfg.tmp.$$
Expand All @@ -20,17 +19,15 @@ revert_config_and_exit () {
exit 1
}

# check if nagios is running at all
service nagios status 2>&1 > /dev/null
# status will return 1 if not running
if [ $? -ne 0 ]; then
NAGIOS_RUNNING=0
fi
# kill all running ncg.pl instances
killall -9 ncg.pl
# clean any tmp generated by ncg.reload.sh
rm -rf /etc/nagios/argo-ncg.d.tmp.*

/usr/sbin/ncg.pl --timeout $NCG_TIMEOUT --output-dir=$OUTPUT_DIR_TMP --final-output-dir=/etc/nagios/argo-ncg.d $NCG_OPTIONS $NCG_BACKUP_OPTIONS || revert_config_and_exit

sed "s|/etc/nagios/argo-ncg.d|$OUTPUT_DIR_TMP|" /etc/nagios/nagios.cfg > $CONFIG_FILE_TMP
/usr/bin/nagios -v $CONFIG_FILE_TMP || revert_config_and_exit
nagios -v $CONFIG_FILE_TMP || revert_config_and_exit

# remove temp
rm -rf $CONFIG_FILE_TMP
Expand All @@ -40,10 +37,6 @@ rm -rf /etc/nagios/argo-ncg.d.backup
mv /etc/nagios/argo-ncg.d /etc/nagios/argo-ncg.d.backup
mv $OUTPUT_DIR_TMP /etc/nagios/argo-ncg.d

if [ $NAGIOS_RUNNING -eq 1 ]; then
/sbin/service nagios reload
else
# here we try to start nagios, continue running even if it fails
echo "Nagios is not running, attempting to start it"
/sbin/service nagios start
fi
# Init script doesn't report status properly
# safer way is to restart
service nagios restart

0 comments on commit 0eb56ea

Please sign in to comment.