#
# @(#) $Id: chkcfg.py,v 1.15 2001/11/06 19:29:56 ivm Exp $
#
# $Log: chkcfg.py,v $
# Revision 1.15  2001/11/06 19:29:56  ivm
# Removed persistent node holds
# More accurate priority update algorithm in case of
# single active queue
#
# Revision 1.14  2001/08/23 19:05:54  ivm
# Implemented search with feedback RM algorithm
# Added -v option to status.py
# Fixed handling of time limits in FBSSectionInfo
#
# Revision 1.13  2001/04/26 20:46:30  ivm
# Fixed some bugs
#
# Revision 1.12  2001/02/05 20:09:10  ivm
# Check more accurately whether queue name is specified
# Allow queues without default process type
#
# Revision 1.11  2000/10/23 15:25:50  ivm
# Added daemon host address validation
#
# Revision 1.10  2000/09/26 16:27:37  ivm
# Fixed some bugs
#
# Revision 1.9  2000/09/14 21:48:20  ivm
# Fixed usage information printing in farm_config
#
# Revision 1.7  2000/09/12 16:42:36  ivm
# Implemented local resource list in chkcfg and splitcfg
#
# Revision 1.6  2000/09/07 17:54:54  ivm
# Implemented dynamic modification of local scratch disk mapping
#
# Revision 1.5  2000/08/25 14:37:28  ivm
# Check for quota specification format
#
# Revision 1.3  2000/08/18 21:15:43  ivm
# Implemented dynamic re-configuration
#
# Revision 1.2  2000/08/07 16:02:44  ivm
# Updated for resource pools
#
# Revision 1.1  2000/05/23 21:51:33  ivm
# Added chkcfg
#
#

import os
from socket import *
import socket

def checkAddresses(cfg):
	sts = 'OK'
	bmgr_host = cfg.getValue('bmgr','*','host')
	if not bmgr_host:
		print 'ERROR: unspecified host name for BMGR'
		sts = 'Error'
	else:
		try:	gethostbyname(bmgr_host)
		except socket.error, val:
			print 'ERROR: can not resolve BMGR host name <%s>: ' % bmgr_host, val
			sts = 'Error'

	api_port = cfg.getValue('bmgr','*','api_port')
	if type(api_port) != type(1):
		print 'ERROR: undefined or invalid api_port number for bmgr: <%s>' % api_port
		sts = 'Error'

	lch_port = cfg.getValue('bmgr','*','launcher_if_port')
	if type(lch_port) != type(1):
		print 'ERROR: undefined or invalid launcher_if_port number for bmgr: <%s>' % lch_port
		sts = 'Error'
	if lch_port == api_port:
		print 'ERROR: launcher_if_port and api_port number for bmgr are the same'
		sts = 'Error'

	evt_port = cfg.getValue('bmgr','*','event_mgr_port')
	if type(evt_port) != type(1):
		print 'ERROR: undefined or invalid event_mgr_port number for bmgr: <%s>' % evt_port
		sts = 'Error'
	if evt_port == api_port:
		print 'ERROR: event_mgr_port and api_port number for bmgr are the same'
		sts = 'Error'
	if evt_port == lch_port:
		print 'ERROR: event_mgr_port and launcher_if_port number for bmgr are the same'
		sts = 'Error'
	
	logd_host = cfg.getValue('logd','*','host')
	if not logd_host:
		print 'WARNING: unspecified host name for Logd'
		if sts == 'OK': sts = 'Warning'
	else:
		try:	gethostbyname(logd_host)
		except socket.error, val:
			print 'ERROR: can not resolve Logd host name <%s>: ' % logd_host, val
			sts = 'Error'
	logd_port = cfg.getValue('logd','*','server_port')
	if type(logd_port) != type(1):
		print 'ERROR: undefined or invalid server_port number for logd: <%s>' % logd_port
		sts = 'Error'
	return sts
			
def checkResources(fcfg):
	sts = 'OK'

	lrdct = fcfg.getValueDict('resources','*','local',defValue = None)
	if not lrdct:	lrdct = {}

	for k, v in lrdct.items():
		if v != None:
			print 'ERROR: Invalid specification for local resource <%s>' % k
			sts = 'Error'

	grdct = fcfg.getValueDict('resources','*','global',defValue = None)
	if not grdct:	grdct = {}

	for k, v in grdct.items():
		if v == None:
			print 'ERROR: Capacity unspecified for global resource <%s>' % k
			sts = 'Error'
		if lrdct.has_key(k):
			print 'ERROR: Name <%s> used for global and local resource' % k
			sts = 'Error'
	
	# check pools		
	poollst = fcfg.names('resource_pools','*')
	for rp in poollst:
		if grdct.has_key(rp):
			print 'ERROR: Name <%s> used for pool and global resource' % rp
			sts = 'Error'
		if lrdct.has_key(rp):
			print 'ERROR: Name <%s> used for pool and local resource' % rp
			sts = 'Error'
		urlst = fcfg.getValueList('resource_pools','*',rp)
		if not urlst:
			print 'WARNING: Empty resource pool <%s>' % rp
			if sts != 'Error':
				sts = 'Warning'
			continue
		t = ''
		if grdct.has_key(urlst[0]): t = 'g'
		elif lrdct.has_key(urlst[0]): t = 'l'
		if not t:
			print 'ERROR: Unknown resource <%s> in pool <%s>' % (urlst[0], rp)
			sts = 'Error'
			continue
		for rn in urlst[1:]:
			rt = ''
			if grdct.has_key(rn): rt = 'g'
			elif lrdct.has_key(rn): rt = 'l'
			if not rt:
				print 'ERROR: Unknown resource <%s> in pool <%s>' % (rn, rp)
				sts = 'Error'
			if rt != t:
				print 'ERROR: Resources of mixed types in pool <%s>' % rp
				sts = 'Error'
	return sts

def checkNodes(fcfg):
	sts = 'OK'
	for nn in fcfg.names('node_list','*'):
		cn = fcfg.getValueList('node_list','*',nn)
		if not cn:
			print 'ERROR: Invalid node class specification for node <%s>:' % nn, \
				cn
			sts = 'Error'
			continue
		cn = cn[0]
		if type(cn) != type(''):
			print 'ERROR: Invalid node class specification for node <%s>:' % nn, \
				cn
			sts = 'Error'
			continue
		if not cn in fcfg.ids('node_class'):
			print 'ERROR: Undefined node class <%s> for node <%s>' % (cn, nn)
			sts = 'Error'
			continue
	llst = fcfg.getValueList('resources','*','local')
	if not llst:	llst = []
	# check node classes
	for cn in fcfg.ids('node_class'):
		print 'Checking node class <%s>' % cn
		rsrc = fcfg.getValueDict('node_class',cn,'resources', defValue=None)
		if not rsrc:	rsrc = {}
		if not rsrc:
			print 'WARNING: Resource capacity is not defined for node class <%s>' % cn
			if sts != 'Error':	sts = 'Warning'
		for rn, rc in rsrc.items():
			if type(rc) != type(1) and rc != None:
				print 'ERROR: Invalid resource capacity specification for node class <%s>, resource <%s>' %\
					(cn, rn)
				sts = 'Error'
				break
			if not rn in llst:
				print 'ERROR: Non-local resource <%s> in definition of node class <%s>' % (rn, cn)
				sts = 'Error'
		disks = fcfg.getValueDict('node_class',cn,'local_disks',defValue=None)
		if not disks:
			print 'WARNING: Local disks not defined for node class <%s>' % cn
			disks = {}
			if sts != 'Error':
				sts = 'Warning'
		for dn, dir in disks.items():
			if not dir:
				print 'ERROR: Undefined location for local disk <%s> on nodes of class <%s>' % (dn, cn)
				sts = 'Error'
			if not rsrc.has_key(dn):
				print 'WARNING: Disk <%s> is not listed as local resource for node class <%s>' % (dn, cn)
				if sts == 'OK': sts = 'Warning'
			if rsrc[dn] == None:
				print 'ERROR: Attribute name <%s> is used as disk name for node class <%s>' % (dn, cn)
				sts = 'Error'

	return sts
		
def checkProcTypes(cfg):
	sts = 'OK'
	for pt in cfg.ids('proc_type'):
		dr = cfg.getValueDict('proc_type',pt,'proc_rsrc_defaults')
		if dr == None:
			print 'WARNING: Default process resource requirements are not defined for process type <%s>' % pt
			if sts != 'Error':	sts = 'Warning'
			continue
		for rn, rv in dr.items():
			if type(rv) != type(1) and rv != None:
				print 'ERROR: Invalid default process resource specification for process type <%s>, resource <%s>' %\
					(pt, rn)
				sts = 'Error'
				break
		sr = cfg.getValueDict('proc_type',pt,'sect_rsrc_defaults')
		if sr == None: continue
		for rn, rv in sr.items():
			if type(rv) != type(1) and rv != None:
				print 'ERROR: Invalid default section resource specification for process type <%s>, resource <%s>' %\
					(pt, rn)
				sts = 'Error'
				continue
		rq = cfg.getValueDict('proc_type',pt,'resource_quota', defValue=None)
		if rq == None:	continue
		for rn, rv in rq.items():
			if type(rv) != type(1):
				print 'ERROR: Invalid resource quota specification for process type <%s>, resource <%s>' %\
					(pt, rn)
				sts = 'Error'
				continue
	return sts

def checkQueues(cfg):
	sts = 'OK'
	for qn in cfg.ids('queue'):
		pt = cfg.getValue('queue',qn,'proc_type')
		if pt == None:
			print 'WARNING: Default process type is not defined for queue <%s>' % qn
			if sts == 'OK':
				sts = 'Warning'
			continue
		if type(pt) != type(''):
			print 'ERROR: Invalid default process type for queue <%s>:' % qn, pt
			sts = 'Error'
			continue
		if not pt in cfg.ids('proc_type'):
			print 'ERROR: Default process type for queue <%s> <%s> is not defined' % (qn, pt)
			sts = 'Error'
			continue
	return sts
	
def exists(path):
	try:	s = os.stat(path)
	except: return 0
	else:	return 1

def checkKerberos(cfg):
	sts = 'OK'
	auth_on = cfg.getValue('kerberos','*','client_auth_required','no') == 'yes'
	if auth_on:
		try:	import krb5
		except:
			print 'ERROR: krb5 module is unavailable'
			sts = 'Error'
		princ = cfg.getValue('kerberos','*','principal')
		if not princ:
			print 'ERROR: FBSNG service principal is not defined'
			sts = 'Error'
		ktf = cfg.getValue('kerberos','*','keytab')
		if not ktf:
			print 'ERROR: FBSNG service principal keytab file is not defined'
			sts = 'Error'
		elif not exists(ktf):
			print 'ERROR: FBSNG service principal keytab file is not found'
			sts = 'Error'
	return sts
	
def checkFiles(cfg):
	sts = 'OK'
	sld = cfg.getValue('bmgr', '*', 'section_log_dir')
	if type(sld) != type(''):
		print 'ERROR: Invalid specification for section log directory: ', sld
		sts = 'Error'
	else:
		if not exists(sld):
			print 'ERROR: Section log directory <%s> does not exist' % sld
			sts = 'Error'
	lrd = cfg.getValue('logd', '*', 'log_dir')
	if type(lrd) != type(''):
		print 'ERROR: Invalid specification for log root directory: ', lrd
		sts = 'Error'
	else:
		if not exists(lrd):
			print 'ERROR: Log root directory directory <%s> does not exist' % lrd
			sts = 'Error'
	jdb = cfg.getValue('jobdb', '*', 'root')
	if type(jdb) != type(''):
		print 'ERROR: Invalid specification for job DB directory: ', jdb
		sts = 'Error'
	else:
		if not exists(jdb):
			print 'ERROR: Job DB directory <%s> does not exist' % jdb
			sts = 'Error'

	hdb = cfg.getValue('history', '*', 'hist_dir')
	if type(hdb) != type(''):
		print 'ERROR: Invalid specification for history DB directory: ', hdb
		sts = 'Error'
	else:
		if not exists(hdb):
			print 'ERROR: History DB directory <%s> does not exist' % hdb
			sts = 'Error'
	return sts
	
if __name__ == '__main__':
	import sys
	from config import *
	severity = {'OK':0, 'Warning':1, 'Error':2}
	cfg = ConfigFile(sys.argv[1])
	fcfg = ConfigFile(sys.argv[2])

	print 'Checking resources ...'
	sts = checkResources(fcfg)

	print 'Checking nodes ...'
	s = checkNodes(fcfg)
	if severity[s] > severity[sts]:
		sts = s

	print 'Checking process types ...'
	s = checkProcTypes(fcfg)
	if severity[s] > severity[sts]:
		sts = s

	print 'Checking queues ...'
	s = checkQueues(fcfg)
	if severity[s] > severity[sts]:
		sts = s

	print 'Checking files and directories ...'
	s = checkFiles(cfg)
	if severity[s] > severity[sts]:
		sts = s

	print 'Checking daemon addresses ...'
	s = checkAddresses(cfg)
	if severity[s] > severity[sts]:
		sts = s

	print 'Checking Kerberos support ...'
	s = checkKerberos(cfg)
	if severity[s] > severity[sts]:
		sts = s

	print 'Configuration check result: ', sts
	sys.exit(sts == 'Error')
	
