This is not meant to be used as is. You will need to uncomment and edit as needed.
# Grid Proxy Certificate and VOMS Attributes
# ==========================================
[gridproxy]
# The default VOMS to use. You can override this for specific probes by
# setting "voms" under the corresponding section.
#default_voms = ops
# Alternative 1: Use an externally generated proxy certificate. You can either
# export X509_USER_PROXY or point to it with
#user_proxy = /var/cache/nagios/gridproxy.pem
# Alternative 2: Let the probe generate a proxy certificate on demand from
# a robot certificate.
#user_cert = /etc/grid-security/robotcert.pem
#user_key = /etc/grid-security/robotkey.pem
# Checking Storage Elements
# =========================
[gridstorage]
# Base directory where to store temporary files and runtime state information.
#plugins_spooldir = /var/spool/nagios/plugins
# The ARC commands will store some files under $HOME/.arc/. Since the home
# directory may not be set to something usable, set an appropriate value here
# to instruct the Nagios plugins to override $HOME at startup.
#home_dir = /var/spool/nagios
# The log-level to use for this probe. In valid values in order of
# decreasing verbosity are DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL.
#loglevel = WARNING
# Checking Compute Elements: Information System
# =============================================
[arcinfosys]
# Same as for [gridstorage].
#plugins_spooldir = /var/spool/nagios/plugins
#home_dir = /var/spool/nagios
# The log-level for this probe as described under [gridstorage].
# It may be useful to set this to INFO.
#loglevel = WARNING
# The glue2 entry point
# ---------------------
#
# These are also provided as command-line options.
# Use this GLUE2 schema instead of querying the CE.
#glue2_schema =
# Warn if there are no objects of these classes.
#warn_if_missing = GLUE2AdminDomain,GLUE2Service,GLUE2Endpoint
# Report critical status if there are no object of these classes.
#critical_if_missing =
# A comma-separating list of foreign key attribute types which should be
# reflected in the DIT.
#hierarchical_foreign_keys =
# Require that all foreign keys which represent aggregation or composition
# are reflected in the DIT.
#hierarchical_aggregates =
# The aris and egiis entry points
# -------------------------------
#
# Use the command-line options.
# Example tests for the aris entry point
# --------------------------------------
# Usage: --cluster-test=cache_total
[arcinfosys.aris.cache_free]
type = limit
value = float(cache_free)/cache_total
critical.min = 0.01
warning.min = 0.1
# Usage: --cluster-test=topphys
[arcinfosys.aris.topphys]
type = regex
variable = runtimeenvironment
critical.pattern = APPS/HEP/ATLAS-TOPPHYS
critical.message = Missing TOPPHYS.
# Usage: --queue-test=queue-active
[arcinfosys.aris.queue-active]
type = regex
variable = status
critical.pattern = ^active$
critical.message = Inactive queue
# Checking Compute Elements: Job Submission
# =========================================
[arcce]
# Same as for [gridstorage].
#plugins_spooldir = /var/spool/nagios/plugins
#home_dir = /var/spool/nagios
# The log-level for this probe as described under [gridstorage].
#loglevel = WARNING
[arcce.connection_urls]
# This section can be used to force specific flavours and connection URLs for
# individual CEs. Each line takes the form
#
# ce.example.org = FLAVOUR:URL
#
# where the right hand side corresponds to the -c argument of arcsub(1).
# Example Scripted Job Tests
# --------------------------
#
# These checks are enabled by passing "--test NAME" to the submit command,
# where NAME is the section name without the "arcce." prefix. They injects
# pieces of shell to to the remote script and checks the output using
# regular expression patterns.
[arcce.python]
jobplugin = scripted
required_programs = python
script_line = python -V >python.out 2>&1
output_file = python.out
output_pattern = Python\s+(?P<version>\S+)
status_ok = Found Python version %(version)s.
status_critical = Python version not found in output.
service_description = ARCCE Python version
[arcce.perl]
jobplugin = scripted
required_programs = perl
script_line = perl -v >perl.out 2>&1
output_file = perl.out
output_pattern = This is perl, v(?P<version>\S+)
status_ok = Found Perl version %(version)s.
status_critical = Perl version not found in output.
service_description = ARCCE Perl version
[arcce.gcc]
jobplugin = scripted
required_programs = gcc
script_line = gcc -v >gcc.out 2>&1
output_file = gcc.out
output_pattern = gcc version (?P<version>\S+)
status_ok = Found GCC version %(version)s.
status_critical = GCC version not found in output.
service_description = ARCCE GCC version
[arcce.csh]
jobplugin = scripted
required_programs = csh
script_line = echo >csh-test.csh '#! /bin/csh'; echo >>csh-test.csh 'env >csh.out'; chmod +x csh-test.csh; ./csh-test.csh
output_file = csh.out
output_pattern = ^PATH=
status_ok = Found working csh.
status_critical = Did not find $PATH in csh environment.
service_description = ARCCE csh usability
# Example Storage Job Checks
# --------------------------
#
# These check are also enabled by passing the second componest of the
# section name to the --test option. This will add the specified staging to
# the job description. Input files will must exist in advance. Output
# files will be removed after checking that they exist.
[arcce.stage_srm]
jobplugin = staging
staged_inputs = srm://srm.example.org/somedir/testfile
staged_outputs = srm://srm.example.org/somedir/srm-%(hostname)s-%(epoch_time)s
service_description = ARCCE SRM Result
[arcce.stage_gridftp]
jobplugin = staging
staged_inputs = gsiftp://srm.example.org/somedir/testfile
staged_outputs = gsiftp://srm.example.org/somedir/gsiftp-%(hostname)s-%(epoch_time)s
service_description = ARCCE GridFTP Result
[arcce.stage_lfc]
jobplugin = staging
staged_inputs = lfc://lfc.example.org/lfcdir/testfile-lfc
staged_outputs = lfc://srm://srm.example.org/somedir/lfc-%(hostname)s-%(epoch_time)s@lfc.example.org/lfcdir/lfc-%(hostname)s-%(epoch_time)s
service_description = ARCCE LFC Result
This configuration is not meant to be used as is. It is an example which illustrates how to use the entry points of the check_arcce probe and define the associated passive services. Other probes are omitted here, as they are configured as independent services similar to commonly available Nagios probes.
# -------------------------------------------------------------------------- # This is an example Nagios configuration for the ARC-CE probes meant for # documenation purposes. It cannot be used as-is. # -------------------------------------------------------------------------- # Contacts and Contact Groups # =========================== # You probably already have contacts defined in your Nagios configuration, so # you can skip these and substitute your own below. define contactgroup { contactgroup_name nagios-operators members jdoe } define contact { use generic-contact contact_name jdoe email jdoe@example.org } # Commands Definitions # ==================== # This is a dummy command for passive services. You may already have something # like it in your Nagios configuration. define command { command_name check_passive command_line /bin/true } # This command monitors running jobs and collects those which have teminated, # reporting passive results. define command { command_name check_arcce_monitor command_line $USER1$/check_arcce -H $HOSTNAME$ monitor } # A job submission check including sub-tests which are defined in the plugin # configuration in separate sections. The results of the sub-tests will be # passively reported to the service names defined in the same configuration. define command { command_name check_arcce_submit command_line $USER1$/check_arcce \ -H $HOSTNAME$ submit \ --test python --test perl --test csh --test gcc } # A job submission check with staging. The arguments to --stage-input options # must exist. The arguments to --stage-output options will be overwritten, and # deleted on termination. This command is not used below. To use it, add an # active service and a passive service named "ARCCE SRM Job Termination". define command { command_name check_arcce_submit_staging # Passed explicitly: # command_line $USER1$/check_arcce \ # -H $HOSTNAME$ submit --job-tag srm \ # --termination-service 'ARCCE SRM Job Termination' \ # --stage-input srm.txt=srm://srm.example.org/nagios/readable.txt \ # --stage-output srm://srm.example.org/nagios/srm-$HOSTNAME$-$TIMET$.txt \ # Using a predefined job-test: # command_line $USER1$/check_arcce \ # -H $HOSTNAME$ submit --job-tag srm \ # --termination-service 'ARCCE SRM Job Termination' \ # --test stage_srm } # Host Groups and Host Templates # ============================== # You need to have one host definitions to which the monitoring service is # assigned to. This is typically the Nagios host itself, for which you # probably already have a definition. define host { name nagios-host use generic-host max_check_attempts 10 contact_groups nagios-operators register 0 } # The following host group and template will be used for all CEs. define hostgroup { hostgroup_name arcce-hosts alias ARCCE Hosts } define host { name arcce-host use generic-host max_check_attempts 10 contact_groups nagios-operators hostgroups arcce-hosts register 0 } # Service Groups and Service Templates # ==================================== define servicegroup { servicegroup_name arcce-services alias ARCCE Services } define service { name arcce-service use generic-service servicegroups arcce-services check_period 24x7 max_check_attempts 3 flap_detection_enabled 0 contact_groups nagios-operators notifications_enabled 0 register 0 } define service { name arcce-monitoring-service use arcce-service normal_check_interval 5 retry_check_interval 5 register 0 } define service { name arcce-submission-service use arcce-service normal_check_interval 30 retry_check_interval 30 register 0 } define service { name arcce-passive-service use arcce-service active_checks_enabled 0 passive_checks_enabled 1 check_command check_passive register 0 } define service { use arcce-monitoring-service host_name localhost service_description ARCCE Monitoring check_command check_arcce_monitor } # For each ARC CE, we need one active service for submission and a number of # passive services to collect the results. In the following we associate the # per-CE services to the "arcce-hosts" group, which will add them to all # members of the group. define service { use arcce-submission-service service_description ARCCE Job Submission hostgroup_name arcce-hosts check_command check_arcce_submit } define service { use arcce-passive-service service_description ARCCE Job Termination hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE Python version hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE Perl version hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE GCC version hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE csh usability hostgroup_name arcce-hosts } # Hosts # ===== # This provides the monitoring service. define host { use nagios-host host_name localhost } # Any host which use the arcce-host template will get an active submission # service, and all the related passive services. #define host { # use arcce-host # host_name ce-00.example.org #}