#!/usr/bin/perl -w

=head1 NAME

ngconfigtest - checks the arc.conf for inconsistencies, known problems
or (in a future development) just general bad taste.

=head1 SYNOPSIS

ngconfigtest --printall

=head1 DESCRIPTION

The motivation behind this little script was to have a repository for
automated tests on issues that came up on the NorduGrid developers
mailing list. As such this script indicates directories that are not
present or indicates that the cache directory does not reside on a file
system that knows how to implement UNIX locking properly. The tool is
supposed to help both the newbie and the experts to improve overall
reliability of the grid.

BECAUSE EVERY INSTALLATION OF ARC IS DIFFERENT THIS UTILITY ONLY SUGGESTS
WHAT COULD BE WRONG. SOMETIMES IT IS OVERRESTRICTIVE. AND SOMETIMES IT
CAN MISS SOME MISCONFIGURATION. NEVER TREAT RESULTS PRODUCED BY IT AS
ULTIMATE TRUTH.

=head1 OPTIONS

=over 4

=item --config <string>

Specifies the location of the config file, by default it is /etc/arc.conf

=item --printall

Lists all variable names of the config file together with their values.

=item --timeserver <server>

Allows the specification of a server against which to test the local
system's time.

=item --skip-warnings

Do not show warnings.

=item --help

Quick summary of options,

=item --man

Detailed man page.

=back

=cut


#################### P R E A M B E L  and options parsing ####################

use strict;
use warnings;
use Getopt::Long;

my ($conffile,$printall,$skip_warnings,$help,$man)=("/etc/arc.conf",0,0,0,0);

#Please make sure this reference server is not one you administer yourself.\n";
my $timeserver="europe.pool.ntp.org";

my $arcloc = "/opt/nordugrid/";
$arcloc = $ENV{"NORDUGRID_LOCATION"} if defined $ENV{"NORDUGRID_LOCATION"};

my $globusloc = "/opt/globus/";
$globusloc = $ENV{"GLOBUS_LOCATION"} if defined $ENV{"GLOBUS_LOCATION"};

my $OS = `uname`;
chomp $OS;

my $usercert;
my $hostcert;
my $CApath;

GetOptions(
	"config:s" => \$conffile,
	"printall" => \$printall,
	"skip-warnings" => \$skip_warnings,
	"timeserver:s" => \$timeserver,
	"help" => \$help,
	"man" => \$man
	) or die "Could not parse options.\n";

if ( $man or $help ) {
        # Load Pod::Usage only if needed.
	require "Pod/Usage.pm";
	import Pod::Usage;
	pod2usage(1) if $help;
	pod2usage(VERBOSE => 2) if $man;
}

my $warnings=0;

####################### C H E C K S  #######################################

=head1 PERFORMED TESTS

=over 4

=item timecheck

The current time is compared with an external time server. A clock
shift higher than a maximally allowed maxtimediff results in an error.

=cut

sub timecheck($$) {
	my ($timeserver, $maxtimediff) = @_;
	my $timeoffset = undef;

	my $ntpdate = "/usr/sbin/ntpdate";
	unless ( -x $ntpdate )  {
		print STDERR "W: Could not spot location of 'ntpdate'.\n";
		return 0;
	}

	unless (open(NTPDATE, "$ntpdate -q $timeserver |")) {
		print STDERR "W: Could not properly invoke 'ntpdate'.\n";
		return 0;
	}
	while (<NTPDATE>) {
		next unless m/^server/;
		if (m/offset *[-+]?([0-9]*\.[0-9]*)/) {
			$timeoffset = $1;
		}
	}
	close NTPDATE;

	if (defined $timeoffset) {
		if (abs($timeoffset)>=$maxtimediff) {
			print STDERR "E: Timecheck: Your time diverts more than " .
				"$maxtimediff seconds ($timeoffset seconds) from the " .
				"public time server '$timeserver'\n";
			return 1;
		} else {
			print STDERR "W: Timecheck: Your time diverts slightly " .
				"$timeoffset seconds) from the public time " .
				"server '$timeserver'.\n"
				unless $skip_warnings;
		}
	} else {
		print STDERR "W: Timecheck: Can't check the time\n";
	}
	return 0;
}

=item check of permissions

The permission to access several different directories are checked.

=cut

sub permcheck($$$) {
	my ($filename, $p, $mask) = @_;
	my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
	       $atime,$mtime,$ctime,$blksize,$blocks)
	                  = stat($filename);
	$mode &= $mask;
	return ($p == $mode);
}

=item File system check

The cache dir should not be on NFS because of some problems with locking that appear irregularly but reliably.

=cut

sub getfs($) {
	my ($dir) = @_;
	my $fs = undef;

	if ($OS eq "Linux") {
		# we are on linux, so we can assume we have a GNU df.
		if (open DF, "/bin/df -TP $dir |") {
			while (<DF>) {
				my @fields = split /\s+/;
				# ugly way to skip the header
				$fs = $fields[1] if $fields[2] =~ m/^[0-9]*$/;
			}
			close DF;
		}
	}

	print STDERR "W: Couldn't determine filesystem type of \"$dir\""
		unless (defined($fs) or $skip_warnings);

	return $fs;
}


# this does simple stateless checks of configuration entries
sub confchecktripel($$$) {
	my ($block, $name, $value) = @_;
	my $failure = 0;

	# check the certificate
	if ($block eq "common" and $name eq "x509_user_cert") {
		if (! -e $value) {
			printf STDERR "E: The host certificate '$value' is not existing or unreadable.\n";
			$failure++;
		} elsif (! -O $value) {
			printf STDERR "E: The host certificate '$value' is not owned by this user.\n";
			$failure++;
		} elsif (!permcheck($value,0600,0777)) {
			printf STDERR "E: Permission of '$value' must be 'rw-------'\n";
			$failure++;
		}
		else {
			$hostcert=$value;
		}
	}

	# check the key
	elsif ($block eq "common" and $name eq "x509_user_key") {
		if (! -e $value) {
			printf STDERR "E: The host key '$value' is not existing or unreadable.\n";
			$failure++;
		} elsif (! -O $value) {
			printf STDERR "E: The host key '$value' is not owned by this user.\n";
			$failure++;
		} elsif (!permcheck($value,0400,0777)) {
			printf STDERR "E: Permission of '$value' must be 'r--------'\n";
			$failure++;
		}
	}

	# check the certificate direcotry
	elsif ($block eq "common" and $name eq "x509_cert_dir") {
		if (! -d $value) {
			printf "E: $name: The certificate directory is not existing.\n";
			$failure++;
		} else {
			my @r0s=glob($value."/*.r0");
			if ($#r0s == -1) {
				printf "W: $name: There are no certificate revocation lists.\n"
							unless $skip_warnings;
				$warnings++;
			} else {
				require File::stat;
				my $t=time();
				my $maxdiffsecs=60*60*24*2; # two days
				foreach my $r0 (@r0s) {
					my ($dev,$ino,$mode,$nlink,
						$uid,$gid,$rdev,$size,
						$atime,$mtime,$ctime,
						$blksize,$blocks)
						  = stat($r0);
					if ($t < $mtime ) {
						print STDERR "E: $r0: mtime in future\n";
						$failure++;
					} elsif ($t > $mtime + $maxdiffsecs) {
						print "E: $r0: Older than $maxdiffsecs seconds.\n";
						$failure++;
					}
				}
			}
			$CApath=$value;
		}
	}

	# check the cache directory
	elsif ($block eq "grid-manager" and $name eq "cachedir") {
		if (! -d $value)  {
			printf STDERR "E: cachedir: not existing at '$value'\n";
			$failure++;
		} else {
			my $fs = getfs $value;
			if (defined $fs and $fs eq "nfs") {
				if (!$skip_warnings) {
					print STDERR "W: cachedir: This system is not stable with the cache " .
						"directory on an NFS-mounted system at $value.\n";
				}
				$warnings++;
			}
		}
	}

	# check the controldir
	elsif ($block eq "grid-manager" and $name eq "controldir") {
		if (! -d $value)  {
			printf STDERR "E: %s: directory (%s) does not exist\n", $value, $name;
			$failure++;
		} elsif (!permcheck($value,0755,0777)) {
			printf STDERR "E: %s: directory (%s) should be 755\n", $value, $name;
			$failure++;
		}
	}

	# check all remaining directory entries of the grid-manager block for existence
	elsif ($block eq "grid-manager" and $name =~ m/dir$/) {
		if (! -d $value)  {
			printf STDERR "E: %s: directory (%s) does not exist\n", $value, $name;
			$failure++;
		}
	}

	return $failure;
}

=item configuration check

General checks on the sensibility of the arc.conf

=cut

sub confcheck($) {
	my ($arcconf) = @_;
	my $failures = 0;
	my $config = {};

	unless (open (CONFIGFILE, "<$conffile")) {
		$failures++;
		print STDERR "E: Could not open '$arcconf' for reading.\n";
		return $failures;
	}
	my $blockname = undef;
	my $blockcontents = 0;
	my $c = 0;
	my $vo_counter = 0;
	while (my $line = <CONFIGFILE>) {
		$c++;

		next if $line =~ m/^#/;
		next if $line =~ m/^\s*$/;

		# a new block?
		if ($line =~ m/^\s*\[(.+)\]\s*$/) {
			if (defined $blockname and $blockcontents == 0) {
				printf STDERR "E: %s: Block \"%s\" is empty\n",
					$arcconf, $blockname;
			}

			$blockname = $1;
			$blockcontents = 0;

			# blocknames must be uniq
			# XXX but there is this special case of vo-blocks...
			if ($blockname eq "vo") {
				$blockname .= "|" . ++$vo_counter;
			}
			if (defined $config->{$blockname}) {
				if (!$skip_warnings) {
					print STDERR "W: $arcconf:$c: Block '"
					  . $blockname
					  ."' is defined multiple times\n";
				}
				$warnings++;
			}

			$config->{$blockname}{">]|found|[<"} = $c;

			next;
		}

		my $name;
		my $value;

		# look out for crap
		unless ($line =~ m/^([^=]*)=(.*)$/) {
			printf STDERR "E: %s:%d: Line is erroneous!\n", $arcconf, $c;
			$failures++;
			next;
		}

		$name = $1;
		$value = $2;

		$name =~ s/^\s*//;
		$name =~ s/\s*$//;
		$name =~ s/^"(.*)"$/$1/;
		$name =~ s/^'(.*)'$/$1/;

		$value =~ s/^\s*//;
		$value =~ s/\s*$//;
		$value =~ s/^"(.*)"$/$1/;
		$value =~ s/^'(.*)'$/$1/;

		if ($name =~ m/^"/ and $name !~ m/"$/
			or $name =~ m/^'/ and $name !~ m/'$/
			or $name !~ m/^"/ and $name =~ m/"$/
			or $name !~ m/^'/ and $name =~ m/'$/) {
				printf STDERR "W: %s:%d: badly quoted attribute name?\n",
						$arcconf, $c	unless $skip_warnings;
				$warnings++;
		}
		if ($value =~ m/^"/ and $value !~ m/"$/
			or $value =~ m/^'/ and $value !~ m/'$/
			or $value !~ m/^"/ and $value =~ m/"$/
			or $value !~ m/^'/ and $value =~ m/'$/) {
				printf STDERR "W: %s:%d: badly quoted value?\n",
						$arcconf, $c	unless $skip_warnings;
				$warnings++;
		}

		# are we within a block?
		unless (defined $blockname) {
			printf STDERR "E: %s:%d: found value=name pair which is " .
				"not part of a block\n", $arcconf, $c;
			$failures++;
			next;
		}

		# check if we know more about this kind of tripel
		$failures += confchecktripel($blockname, $name, $value);

		#count
		$blockcontents++;

		unless ($config->{$blockname}{$name}) {
			$config->{$blockname}{$name} = $value;
		} else {
			$config->{$blockname}{$name} .= ">]|sep|[<" . $value;
		}
	}

	close CONFIGFILE;

	$failures += check_completeness($config);

	if ($printall) {
		foreach my $key (sort { $config->{$a}{">]|found|[<"} <=> $config->{$b}{">]|found|[<"} }  keys %$config) {
			printf "\n# line: %d\n", $config->{$key}{">]|found|[<"};
			if ($key =~ m/^(.*)\|[0-9]+$/) {
				printf "[%s]\n", $1;
			} else {
				printf "[%s]\n", $key;
			}
			my $x = $config->{$key};
			foreach my $item (sort keys %$x) {
				next if $item eq ">]|found|[<";
				foreach my $val (split />\]\|sep\|\[</, $config->{$key}{$item}) {
					printf "%s=\"%s\"\n", $item, $val;
				}
			}
		}
	}

	return $failures;
}

=item check for completeness

Return error if the presence of one value imples one of another

=cut

sub check_completeness() {
	my $config=shift;
	my $failures=0;

	my @required=("common", "group", "grid-manager",
		"gridftpd", "gridftpd/filedir",
		"gridftpd/unixacl", "gridftpd/gacl", "gridftpd/gacldir", "gridftpd/jobs",
		"infosys", "cluster",
		 "queue/fork");

	my @optional=("httpsd","janitor","httpsd/se","vo");

	my @all=(@required,@optional);

	# testing for unknown
	foreach my $k (keys %$config) {
		unless ($skip_warnings) {
			if (grep(/^$k$/,@all) == 0) {
				print STDERR "W: Unknown group identifier '$k'\n";
				$warnings++;
			}
		}
	}

	# testing for the missing
	foreach my $k (@required) {
		unless (exists($config->{$k})) {
			print STDERR "E: Missing group identifier '$k'\n";
			$failures++;
		}
	}

	if (exists($config->{common})) {
	    my $hn=`hostname -f`;
	    chomp($hn);
	    if (!exists($config->{common}{lrms})) {
		print STDERR "E: The entry lrms must not be missed.\n";
		$failures++;
	    }
	    elsif (!exists($config->{"queue/".$config->{common}{lrms}})) {
		print STDERR "E: There must be a [queue/".$config->{common}{lrms}."] block\n"
		            ."   because of the entry lrms in [common].\n";
		$failures++;
	    }
	    if (!exists($config->{common}{hostname})) {
		print STDERR "E: The entry hostname must not be missed.\n";
		$failures++;
	    }
	    elsif ($config->{common}{hostname} ne "$hn") {
		print STDERR "E: The entry of the full hostname (".$config->{common}{hostname}
			                      . ") is better\n"
		            ."   equal to `hostname -f` ($hn).\n"
		            ."   Also test reverse lookup of the hostname.\n";
		$failures++;
	    }

	}


	return $failures;
}


=item check of libraries

uses ldd to check if all libraries are installed

=cut

sub check_libraries() {
	my $failures = 0;

	unless (-e $globusloc) {
		printf STDERR "E: %s: cant find Globus: no such file or directory\n", $globusloc;
		$failures++;
	}

	unless (-e $arcloc) {
		printf STDERR "E: %s: cant find ARC: no such file or directory\n", $arcloc;
		$failures++;
	}

	return $failures if ($failures);

	my @to_check;
	foreach ($globusloc . "/bin/*" , $globusloc . "/sbin/*" , $globusloc . "/lib/*" ,
			$arcloc . "/bin/*" , $arcloc . "/sbin/*" , $arcloc . "/lib/*") {
		@to_check = ( @to_check , glob $_ );
	}

	my %missing;
	foreach my $file ( @to_check ) {
		next unless -f $file;
		next if $file =~ m/\.a$/;
		next if $file =~ m/\.la$/;

		my $command = "LC_ALL=C ";
		$command = "LD_LIBRARY_PATH=$globusloc/lib:$arcloc/lib:\$LD_LIBRARY_PATH ";
		$command .= "ldd $file 2>/dev/null |";

		my %libs;

		if (open LDD, $command) {
			while (<LDD>) {
				my $lib = $_;

				if ($lib =~ m/^\s*([^\s]+)\.so\.([^\s]+)\s*=>/) {
					my $a=$1;
					my $b=$2;
					my $index=$a;
					$index =~ s/_(gcc)(16|32|64|128|256)(dbg)?(pthr)?\././;
					if (defined $libs{$index}) {
						printf STDERR "E: %s: uses multiple versions of lib " .
							"%s: %s and %s. This might not work\n",
							$file, $a, $libs{$index}, $b;
						$failures++;
					} else {
						$libs{$index} = $b;
					}
				}

				next unless /not found/;

				$lib =~ m/^\s*([^\s]+)\s*=>/;
				my $missing = $1;

				unless (defined $missing{$missing}) {
					$missing{$missing} = 1;
					printf STDERR "E: %s: needs %s. Not found.\n", $file, $missing;
					$failures++;
				}

			}
			close LDD;
		} else {
			if (!$skip_warnings) {
				printf STDERR "W: Can not check used libraries of %s\n", $file;
			}
			$warnings++;
		}
	}

	return $failures;
}

sub check_certificates() {

	my $failures=0;

	# check if CAdir is present

	if (!defined($CApath)) {
		printf STDERR "The x509_cert_dir was not set.\n";
		$failures++;
		$CApath="/etc/grid-security/certificates";
		if ( ! -d $CApath) {
			return;
		}
	}

	# check of host certificate

	if (!defined($hostcert)) {
		$hostcert="/etc/grid-security/hostcert.pem";
	}
	if ( -f $hostcert) {
		if (system("openssl verify -CApath $CApath $hostcert | grep 'OK'")) {
			printf STDERR "E: verification of host cert at %s failed.\n", $hostcert;
			$failures++;
		}
	}
	else {
		if (!$skip_warnings) {
			printf STDERR "W: Not verifying host cert which is not present at %s (should already be reported).\n", $hostcert;
		}
		$warnings++;
	}


	# check of user certificate

	if (!defined($usercert)) {
		$usercert=$ENV{"HOME"}."/.globus/usercert.pem";
	}
	if ( -f $usercert) {
		if (system("openssl verify -CApath $CApath $usercert | grep 'OK'")) {
			printf STDERR "W: verification of user cert at %s failed.\n", $usercert;
			$failures++;
		}
	}
	else {
		if (!$skip_warnings) {
			printf STDERR "W: Not verifying user cert which is not present at %s.\n", $usercert;
		}
		$warnings++;
	}

	return $failures;

}


my $failed = 0;

$failed += timecheck($timeserver, 0.2);
$failed += confcheck($conffile);
$failed += check_libraries();
$failed += check_certificates();

if ($failed == 0) {
	print "Found no apparent failures.\n";
} else {
	printf "Found %d failure%s.\n", $failed, ($failed > 1) ? "s" : "";
}
if ($warnings) {
	printf "Found %d non-critical issue%s%s.\n",
		$warnings, ($warnings > 1) ? "s" : "",
		($skip_warnings?" (not shown)":"");
}

exit $failed;

=head1 SEE ALSO

http://www.nordugrid.org and our mailing lists.

=cut


# EOF
