#!/pkg/bin/perl -w

# $Id: grids,v 1.46 1998/02/17 19:22:27 rowe Exp $

# This script attempts to set up whatever hierarchy you have configured.

use Hierarchy_interface;
use Test_script;
use Clog;
use Mod_utility;
use Grdbm;

&init_startup_env();       # misc init stuff, some from ENV vars

#-------------------- Preliminary setup --------------------------------#

  $user = "root";
  $pass = "enter_now";

  local $grids_db = $Hierarchy_interface::grids_db; # Hierarchy configuration
  $nlogs = 5;  # Number of old log directories to keep around
  @default_rs_name = ( 'sweep',
		       'worm',
		      );

  goto GRIDS_START if $ARGV[0] eq 'start';
  goto GRIDS_STOP if $ARGV[0] eq 'stop';
  goto GRIDS_DESTROY if $ARGV[0] eq 'destroy';
  goto GRIDS_CHECK   if $ARGV[0] eq 'check';  
  goto GRIDS_EXTRA   if $ARGV[0] eq 'extra';  
  goto GRIDS_VIEW    if $ARGV[0] eq 'view';   

  print "Usage:\n\tgrids start\n";
  print "(\t(builds the config specified in 'grids.db' and 'grids.db.mine'\n\n";
  print "\tgrids stop\n";
  print "\tgrids stop -module  (to leave module controllers running)\n";
  print "\tgrids destroy  (to wipe out all processes for sure)\n";
  print "\tgrids check  (to check hierarchy consistency)\n";
  print "\tgrids extra  (add any extra commands to execute here instead of with UI)\n";
  print "\tgrids view  (store a copy of hierarchy in file output_hierarchy)\n";

  exit;

# ------------------------ More setup on GrIDS start --------------------#
GRIDS_START:
 
  &clean_up_files(); # better safe than sorry
  &clean_up_logs();
  &initialize_tests("grids.test"); # Setup so we can do tests
  $log = new Clog('grids_test','N/A',"grids.test.log"); # Set up our log
  $log->{'central_log'} = 0; # No central logging - presently broken.
  $SIG{ALRM} = 'catch_alarm';  # used if ($interactive_auto_start)

#----------------------- Boostrapping the hierarchy ----------------------#

  $grids_db->{'init_user'} = $user;
  $grids_db->{'init_pass'} = $pass;

# prune/use holdover state from $grdb_location.
  ### RC BUG::  Above ignored for now if START.

warn ("ABOUT TO CALL $grids_db->parse_config_file\n") if $rcdbg;

   $grids_db->parse_config_file
              ($grdbm_standard_config, $grdbm_personal_config);

  &die_nice ($grids_db) unless ref $grids_db eq Grdbm;

######################################################################

# First try to &start_module_controllers() for all SPECIFIC HOSTS
# identified by &parse_config_file():

  if ($interactive_startup) {
    # caution: splicing confuses foreach $item and foreach $index
    for ($i = 0; $i <= $#{$grids_db->{'module_hosts'}}; $i++) {
      my $host = ${$grids_db->{'module_hosts'}}[$i];
      my $dept = ${$grids_db->{'host_to_dept'}}{$host};
      my $dept_index = $grids_db->{'dept_to_index'}{$dept};
      my $ok = $grids_db->ping_ok ($host)  &&  $grids_db->fork_swap_ok ($host);
      ${$grids_db->{'claimed_hosts'}}{$host} = $ok;   # mark it as ok or faulty
      while (!$ok) {
        my $old_host = $host;
        my $specific_index = undef;   # used if we fail many times on same slot
        delete ${$grids_db->{'host_to_dept'}}{$host};
        delete ${$grids_db->{'module_ports'}}{$host};
        $host = &choose_another_host ($host, $dept, 'Will decrement NUM_HOSTS');
        die "ABORTING:  SPECIFIC HOST " . $old_host . $errmsg
          if $host eq 'DIE';
        &change_host_test_ok() if $host;
        if ($host =~ /^ANY$|^RANDOM$|^0$|^$/i) {
          splice (@{$grids_db->{'module_hosts'}}, $i, 1);
          $i--;   # must alter index counter to inspect *every* host in list
          $ok = 1;
          # remove from that dept's SPECIFIC_HOSTS:
          foreach $j (0 ..
                  $#{$Grdbm::dept_geometry[$dept_index]->{SPECIFIC_HOSTS}}) {
            if ($old_host eq
               ${$Grdbm::dept_geometry[$dept_index]->{SPECIFIC_HOSTS}}[$j]) {
              splice (@{$Grdbm::dept_geometry[$dept_index]->{SPECIFIC_HOSTS}},
                                                                       $j, 1);
              last;
              }  # This splice works because we don't have to iterate past it.
            }
          if ($host eq 0) {  #change config on the fly to use 1 less host:
            $Grdbm::dept_geometry[$dept_index]->{NUM_HOSTS}--
              if exists $Grdbm::dept_geometry[$dept_index]->{NUM_HOSTS};
            warn ("Warning: Reducing NUM_HOSTS in dept $dept\n");
            $log->warn ("Warning: Reducing NUM_HOSTS in dept $dept\n");
            # RC BUG:  Should check/update *all* depts' SM_HOST and AGG_HOST 
            }
          ## else if /ANY|RANDOM/, we'll choose random host vs. specified one
          }
        else {    ## User has entered a specific replacement host:
          ${$grids_db->{'module_hosts'}}[$i] = $host;
          ${$grids_db->{'host_to_dept'}}{$host} = $dept;
          $ok = $grids_db->ping_ok ($host)  &&  $grids_db->fork_swap_ok ($host);
          ${$grids_db->{'claimed_hosts'}}{$host} = $ok;   # mark as ok | faulty
          # RC BUG:  Should check/update *all* depts', eg, SM_HOST and AGG_HOST 
          if (defined $specific_index) {  ## Even if !not! $ok,
            # substitute it for that dept's FAILED SPECIFIC_HOST:
            ${$Grdbm::dept_geometry[$dept_index]->{SPECIFIC_HOSTS}}
                                   [$specific_index] = $host;
            }
          else {
            foreach $j (0 ..
                    $#{$Grdbm::dept_geometry[$dept_index]->{SPECIFIC_HOSTS}}) {
              if ($old_host eq
                ${$Grdbm::dept_geometry[$dept_index]->{SPECIFIC_HOSTS}}[$j]) {
                ${$Grdbm::dept_geometry[$dept_index]->{SPECIFIC_HOSTS}}[$j]
                  = $host;
                $specific_index = $j;
                last;
                }
              }
            } # end if (defined $specific_index)
          }   # end -- User entered a specific replacement host.
        }     # end while (!$ok)
      }       # end for ($i = 0; $i <= $#{$grids_db->{'module_hosts'}}; $i++)
    }
  else {    # not interactive; be user-hostile:
    foreach $host (@{$grids_db->{'module_hosts'}}) {
      unless ( $grids_db->ping_ok ($host) ) {
        die "ABORTING:  SPECIFIC HOST $host failed to \&ping_ok.\n";
        }
      unless ( $grids_db->fork_swap_ok ($host) ) {
        die "ABORTING:  SPECIFIC HOST $host failed to \&fork_swap_ok.\n";
        }
      }
    }

  %{$grids_db->{'module_ports'}} 
	= &start_module_controllers($log,@{$grids_db->{'module_hosts'}});

  unless(keys %{$grids_db->{'module_ports'}} ==
                              @{$grids_db->{'module_hosts'}}) {
    my (@failed_hosts) = map { exists $grids_db->{'module_ports'}{$_} ?
                               () : $_
                             } @{$grids_db->{'module_hosts'}};
    $errmsg = 'Could not get module controllers on SPECIFIC hosts: <';
    $errmsg .=  (join '> <', @failed_hosts) . ">\n";
    $log->warn ($errmsg);
    print ($errmsg);
    foreach $host (@failed_hosts) {
      my $dept = ${$grids_db->{'host_to_dept'}}{$host};
      delete ${$grids_db->{'host_to_dept'}}{$host};
      delete ${$grids_db->{'module_ports'}}{$host};
      ${$grids_db->{'claimed_hosts'}}{$host} = 0;     # mark it as failed
      # Kill it, in case it was just a slow startup:
      # &slay ($host, 'module_controller', 1);     #  slay is too slow, so ...
      ${$grids_db->{'host_to_dept'}}{$host} = 'DESTROY_ME_LATER';

      print "For $host in dept $dept, " .
            "we can Decrement ${dept}->{NUM_HOSTS} (D)\n" .
            "\tor we can choose a Random replacement host (R).  Which? (D/R) ";
      my $input = &user_input (5, 'Will choose random replacement.', 'R');
      if ($input =~ /^D/i) {
        my $dept_index = $grids_db->{'dept_to_index'}{$dept};
        $Grdbm::dept_geometry[$dept_index]->{NUM_HOSTS}--
          if $Grdbm::dept_geometry[$dept_index]->{NUM_HOSTS};
        }
      elsif ($input !~ /^R/i) {
        print "Bogus input.  We will choose a Random replacement host (R).\n";
        }
      } # end foreach @failed_hosts
    }

if ($rcdbg) { # RC BUG debugging
    warn ("Got all SPECIFIC MCs started ok\n");
    $log->warn ("Got all SPECIFIC MCs started ok\n");
    }

######################################################################

# Now &start_module_controllers() for randomly selected hosts.
# If indiv failures, repeat with other hosts until success or out of hosts:
my @unfinished_depts = @Grdbm::dept_geometry;
while (@unfinished_depts) {
  my (@selected_hosts, @hosts_and_ports, $num_to_pick,
      %provisional_hosts, %failed_depts);

  foreach $dept (@unfinished_depts) {
    if ($rcdbg) { # RC BUG debugging
      warn ("Selecting MCs for dept <$dept->{DEPT_NAME}> ...\n");
      $log->warn ("Selecting MCs for dept <$dept->{DEPT_NAME}> ...\n");
      }
    @{$dept->{SELECTED_HOSTS}} = @{$dept->{SPECIFIC_HOSTS}}
                                 unless @{$dept->{SELECTED_HOSTS}};
    next unless $dept->{NUM_HOSTS};
    $num_to_pick =  (exists $dept->{SELECTED_HOSTS})  ?
        $dept->{NUM_HOSTS} - $#{$dept->{SELECTED_HOSTS}} - 1  :
        $dept->{NUM_HOSTS};
    next unless $num_to_pick;

    my ($host_universe) =   ((exists $dept->{HOST_UNIVERSE})  ?
        \@{$dept->{HOST_UNIVERSE}}  :  \@{$grids_db->{unclaimed_hosts}});

    ### RC 6/17/97: rewrote &select_hosts() for efficiency.
    @selected_hosts = $grids_db->select_hosts ($num_to_pick, $host_universe,
                                               $dept->{DEPT_NAME});
    unless ($num_to_pick eq @selected_hosts) {
      $errmsg = "Dept <$dept->{DEPT_NAME}>: remaining \$num_to_pick ("
              . ($num_to_pick - @selected_hosts)
              . ") hosts exceeds its remaining \$#HOST_UNIVERSE ("
              . ($#{$host_universe} + 1) . ")!\n";
      &die_nice ($errmsg) unless $interactive_startup;
      # else $interactive_startup ...
      print $errmsg;
      my $res;
      if ($host_universe ne $grids_db->{unclaimed_hosts}) {
        print "Ok for this dept to use any unclaimed_hosts? (Y/N): ";
        if ($interactive_auto_start) {
          print "Y\n";
          }
        else {
          $res = <STDIN>;
          &die_nice ($errmsg) if $res =~ /^N/i;
          }
        delete $dept->{HOST_UNIVERSE}; # next loop will use unclaimed_hosts
        $failed_depts{$dept->{DEPT_NAME}} = $dept;
        }
      else {   ## near-panic; we ran out of unclaimed_hosts!
        print "Ok to recycle hosts that failed stress tests? (Y/N): ";
        $res = <STDIN>;
        unless ($res =~ /^N/i) {
          foreach $host (keys %{$grids_db->{claimed_hosts}}) {
            next if ${$grids_db->{claimed_hosts}}{$host};
            #else it previously failed its stresstest ...
            push (@{$grids_db->{unclaimed_hosts}}, $host);
            delete ${$grids_db->{claimed_hosts}}{$host};
            }
          if (@{$grids_db->{unclaimed_hosts}}) {
            print "NOTE: These hosts ("
                . join ' ', @{$grids_db->{unclaimed_hosts}}
                . ") may fail their next stress test too!\n";
            # Opportunity for user to alter stresstest parms:
            &change_host_test_ok();
            }
          else {
            print "No stress-failed hosts to recycle!\n";
            }
          }
        if ( ($res =~ /^N/i)  ||  !@{$grids_db->{unclaimed_hosts}} ) {
          my $shrunken_num_hosts = (exists $dept->{SELECTED_HOSTS})  ?
                       @{$dept->{SELECTED_HOSTS}} + @selected_hosts  :
                       @selected_hosts;
          print "Ok to SHRINK/RECONFIG this dept from <$dept->{NUM_HOSTS}> to "
              . "<$shrunken_num_hosts> hosts? (Y/N): ";
          $res = <STDIN>;
          if ($res =~ /^N/i) {
            &die_nice ("NOT ENOUGH HOSTS; cannot shrink to fit\n");
            }
          else {
            $dept->{NUM_HOSTS} = $shrunken_num_hosts;
            }
          } # some among newly @selected_hosts may fail, so this may replay.
        }      ## near-panic; we ran out of unclaimed_hosts!
      }

    @provisional_hosts{@selected_hosts} = ($dept) x @selected_hosts;
    # Use hash slice assignment to associate provisional host with its dept.

    }  ## end --  foreach $dept 


  @hosts_and_ports = &start_module_controllers
                     ($log, keys %provisional_hosts) if keys %provisional_hosts;
  for ($i = 0; $i < $#hosts_and_ports; $i++) {     # load tuples into hash:
    push (@{${$provisional_hosts{$hosts_and_ports[$i]}}{SELECTED_HOSTS}},
          # Lvalue data structure is a $dept in (@Grdbm::dept_geometry)
          $hosts_and_ports[$i]);
    push (@{$grids_db->{'module_hosts'}}, $hosts_and_ports[$i]);
    $grids_db->{'host_to_dept'}{$hosts_and_ports[$i]} = 
                 ${$provisional_hosts{$hosts_and_ports[$i]}}{DEPT_NAME};
    delete $provisional_hosts{$hosts_and_ports[$i]};
    $grids_db->{'module_ports'}{$hosts_and_ports[$i]} = 
                                $hosts_and_ports[$i + 1];
    $i++;    # *NOT* safe to do inside assignment stmt above!  (order of eval)
    }

  ## Remove any *redundancy* from remaining (values %provisional_hosts):
  foreach $host (keys %provisional_hosts) {
    # Remaining selected hosts were rejected; identify corresponding depts:
    #      $provisional_hosts{$host} is a $dept in (@Grdbm::dept_geometry)
    $failed_depts{$provisional_hosts{$host}->{DEPT_NAME}} = 
           $provisional_hosts{$host};
    # i.e, $failed_depts{$dept->{DEPT_NAME}} = $dept;
    # since cannot store a ref as key, only as a value.
    }
  @unfinished_depts = (values %failed_depts);

  if ($rcdbg && @unfinished_depts) {  # RC BUG debugging
    $errmsg = "Need another MC-select pass for depts:\n";
    $errmsg .= join ' ', keys %failed_depts;
    $errmsg .= "\n";
    if (keys %provisional_hosts) {
      $errmsg .= "Failed MC for provisionally-selected hosts/depts:\n";
      foreach $hostname (keys %provisional_hosts) {
        $errmsg .= "\t $hostname for dept ";
        $errmsg .= "$provisional_hosts{$hostname}{DEPT_NAME}\n";

        ${$grids_db->{'claimed_hosts'}}{$hostname} = 0;     # mark it as failed
        # Kill it, in case it was just a slow startup:
        # &slay ($hostname, 'module_controller', 1); #  slay is too slow, so ...
        ${$grids_db->{'host_to_dept'}}{$hostname} = 'DESTROY_ME_LATER';
        }
      }
    warn ($errmsg);
    $log->warn ($errmsg);
    }

  }  ## end -- while (@unfinished_depts)

  $grids_db->save();
  ## RC NOTE:  &Grdbm::select_hosts() does *not* traverse entire
  ##    @{$self->{unclaimed_hosts}} to delete each newly chosen host.
  ##    Rather, it only does that ON DEMAND (driven by future dept's selection).

if ($rcdbg) { # RC BUG debugging
  warn ("FINISHED starting MCs.\n");
  $log->warn ("FINISHED starting MCs.\n");
  }

######################################################################

# Now identify proper hosts for each dept's SM and AGG:
  my %sms_to_assign;    # Lists of dept SMs to be multiplexed when available.
  my %name_to_dept;     # Needed to decode *indirect* multiplex-spec of SMs.
  my $root_dept;
  foreach $dept (@Grdbm::dept_geometry) {
    $name_to_dept->{$dept->{DEPT_NAME}} = $dept;

    $root_dept = $dept if $dept->{DEPT_NAME} eq 'ROOT';   # for use later.

    if ($rcdbg) { # RC BUG debugging
      warn ("Selecting SM and AGG for dept <$dept->{DEPT_NAME}> ...\n");
      $log->warn ("Selecting SM and AGG for dept <$dept->{DEPT_NAME}> ...\n");
      }
    if ($dept->{SM_HOST} =~ /^DEPT::(\S+)$/) {
      # then later, *multiplex* its SM with that for dept $1:
      push (@{$sms_to_assign{$1}}, $dept);
      }
    elsif (!$dept->{SM_HOST}) {   # if == 0 or undefined
      # random choice:
      if (@{$dept->{SELECTED_HOSTS}}) {
        $dept->{SM_HOST} = $dept->{SELECTED_HOSTS} [
                                  int(rand(@{$dept->{SELECTED_HOSTS}})) ];
        }
      else {
        $dept->{SM_HOST} = $grids_db->{'module_hosts'} [
                                  int(rand(@{$grids_db->{'module_hosts'}})) ];
        }
      }
    elsif ($dept->{DEPT_NAME} ne 'ROOT') {
      # check to vrfy it was selected as a host by some dept, hence has a MC:
      # ( root_sm_host does *not* need a MC )
      unless (defined $grids_db->{'host_to_dept'}{$dept->{SM_HOST}}) {
        $errmsg = "Desired SM for dept <$dept->{DEPT_NAME}> " .
                  "on UNCHOSEN host: $dept->{SM_HOST}\n";
        #&die_nice ($errmsg);
        &replace_sm_agg_host ('SM', $dept);
        }
      }

    if (!$dept->{AGG_HOST}) {   # if == 0 or undefined
      # random choice (might select same host as for SM_HOST):
      if (@{$dept->{SELECTED_HOSTS}}) {
        $dept->{AGG_HOST} = $dept->{SELECTED_HOSTS} [
                                  int(rand(@{$dept->{SELECTED_HOSTS}})) ];
        }
      else {
        $dept->{AGG_HOST} = $grids_db->{'module_hosts'} [
                                  int(rand(@{$grids_db->{'module_hosts'}})) ];
        }
      }
    else {
      # check to vrfy it was selected as a host by some dept, hence has a MC:
      unless (defined $grids_db->{'host_to_dept'}{$dept->{AGG_HOST}}) {
        $errmsg = "Desired AGG for dept <$dept->{DEPT_NAME}> " .
                  "on UNCHOSEN host: $dept->{AGG_HOST}\n";
        #&die_nice ($errmsg);
        &replace_sm_agg_host ('AGG', $dept);
        }
      }

    }

  # Now handle *multiplexed* SM choices:
  my $loop_detect = keys %sms_to_assign;   # ie, the *number* of keys
  foreach $base_dept (keys %sms_to_assign) {
    # SM spec in $sharing_dept says use same SM as $base_dept.
    # Note that $sharing_dept is a hashrecord, vs. $base_dept is a DEPT_NAME.
    my @shared_intermediate_depts;
    my ($loop_dept) = $name_to_dept->{$base_dept};
    for ($i = 0; $loop_dept->{SM_HOST} =~ /^DEPT::(\S+)$/; $i++) {
      push (@shared_intermediate_depts, $loop_dept);  # assign this SM too.
      $loop_dept = $name_to_dept->{$1};
      if ($i >= $loop_detect) {
        $errmsg = "FATAL CONFIG:  LOOP in *indirect* multiplexing of SM " .
                  "for dept <$base_dept>!\n";
        &die_nice ($errmsg);
        }
      } # We found end of indirect SM multiplex chain.

    # First assign SM for all intermediate nodes encountered in traversal:
    foreach $sharing_dept (@shared_intermediate_depts) {
      $sharing_dept->{SM_HOST} = $loop_dept->{SM_HOST};
      if ($rcdbg) { # RC BUG debugging
        $errmsg = "Interm. <$sharing_dept->{DEPT_NAME}> SM = "
                . "<$loop_dept->{SM_HOST}>\n";
        warn ($errmsg);
        $log->warn ($errmsg);
        }
      }
    # Note these eventually would be assigned a SM anyway as leaf nodes,
    # but depending on config and order visited, we might waste time
    # traversing them several times.  By doing intermediate assignments now,
    # we ensure the entire algorithm remains Order N.

  # $ { $name_to_dept->{$base_dept} } {SM_HOST} = $loop_dept->{SM_HOST};

    # Now assign SM for all leaf nodes originally stored:
    foreach $sharing_dept (@{$sms_to_assign{$base_dept}}) {
      $sharing_dept->{SM_HOST} = $loop_dept->{SM_HOST};
      if ($rcdbg) { # RC BUG debugging
        $errmsg = "Leaf <$sharing_dept->{DEPT_NAME}> SM = "
                . "<$loop_dept->{SM_HOST}>\n";
        warn ($errmsg);
        $log->warn ($errmsg);
        }
      }
    }

######################################################################

# Pick hosts for the following *after* satisfying selection criteria
# for individual depts' HOST_UNIVERSEs above:

  unless (defined $grids_db->{'ohs_host'}) {
    warn ("Selecting host for OHS ...\n");
    $log->warn ("Selecting host for OHS ...\n");
    if (@{$grids_db->{unclaimed_hosts}}) {
      ( $grids_db->{'ohs_host'} ) =
        $grids_db->select_hosts (1, \@{$grids_db->{unclaimed_hosts}}, 'OHS');
      $grids_db->{'host_to_dept'}{$grids_db->{'ohs_host'}} = 'OHS'
         unless exists $grids_db->{'host_to_dept'}{$grids_db->{'ohs_host'}};
      }
    # If no unclaimed_hosts or if they all fail, grab the first host:
    $grids_db->{'ohs_host'} = (each %{$grids_db->{'host_to_dept'}})[0]
      unless defined $grids_db->{'ohs_host'};
    }
  $grids_db->{'ohs_port'} = $grids_db->new_port()
    unless $grids_db->{'ohs_port'};

    $grids_db->{'root_sm_host'} = $root_dept->{SM_HOST};
    ###  Do below to site it on an unused host (do *not* need a MC there):
    ###  $grids_db->{'root_sm_host'} = 
    ###    splice (@{$grids_db->{unclaimed_hosts}},
    ###            int(rand(@{$grids_db->{unclaimed_hosts}})), 1);
    ###  $grids_db->{'host_to_dept'}{$grids_db->{'root_sm_host'}} = 'ROOT_SM'
    ###    unless exists $grids_db->{'host_to_dept'}
    ###                 {$grids_db->{'root_sm_host'}};

  $grids_db->{'root_sm_port'} = $grids_db->new_port()
    unless $grids_db->{'root_sm_port'};


## RC BUG?  Even if defined, AGG may be saved legacy from previous incarnation
##          of grids whose MC we killed long ago, before reconfig !!!

    $grids_db->{'root_aggregator_host'} = $root_dept->{AGG_HOST};

    $grids_db->{'host_to_dept'}{$grids_db->{'root_aggregator_host'}}
             = 'ROOT_AGG' unless exists $grids_db->{'host_to_dept'}
                                       {$grids_db->{'root_aggregator_host'}};

  $grids_db->save();

  if ($grids_db->{'ohs_attach'}) {   # attach to previously-started OHS
    print "Will attach to previously-started OHS! " if $rcdbg;
    print "$grids_db->{'ohs_host'}:$grids_db->{'ohs_port'}!\n" if $rcdbg;
    # But OHS will squawk Security error if you do normal new_root call!
    }
  else {
### RC BUG KLUDGE ::  Really need to put this in Hierarchy_interface.pm - &new()
  $Hierarchy_interface::ohs_location =
         "$grids_db->{ohs_host}:$grids_db->{ohs_port}";

    &die_nice ("Couldn't start OHS!") unless &start_ohs($grids_db);
    }
  &die_nice ("Couldn't start Root sm!") unless &start_root_sm($grids_db);
## RC:  in Mod_utility;  both do RSH; hence need no MC on those hosts.

  wait_for_ohs_and_root_sm($grids_db,$log);

#  $user = "GrIDS_user";
#  $pass = "password";
#  $user =  $grids_db->{'init_user'};
#  $pass =  $grids_db->{'init_pass'};


### RC BUG KLUDGE ::  Really need to put this in Hierarchy_interface.pm - &new()
  $Hierarchy_interface::ohs_location =
         "$grids_db->{ohs_host}:$grids_db->{ohs_port}";

  $h = new Hierarchy_interface($user,$pass,'null',$log,
						'comm','main::FILE');

#---------------------- Hierarchy transactions proper -----------------------#
    
  # Create the root department
  ($status,$message) = 
    $h->new_root('ROOT',$grids_db->{'root_sm_host'},
	$grids_db->{'root_sm_port'},
	$grids_db->{'root_aggregator_host'},
	$grids_db->{'module_ports'}{$grids_db->{'root_aggregator_host'}}
	);
  &die_nice ($message) unless $status;


  ### Invalid to add_dept before adding HOST of its SM: sometimes a catch22
  ### RC BUG KLUDGE:  catch22 in general case because cannot add_host
  ### to a future dept, and cannot add_dept unless its SM_HOST was
  ### already object of add_host!  Slow kludge is to add_host for all to ROOT;
  ### then move_host for most!!!
  #   
  #   foreach $host (@{$grids_db->{'module_hosts'}}) {
  #     $h->add_host ('ROOT', $host, $grids_db->{'module_ports'}{$host});
  #     }
  ### #######  later, after doing every add_dept  ...
  #   foreach $host (@{$grids_db->{'module_hosts'}}) {
  #     $h->move_host ($grids_db->{'host_to_dept'}{$host}, $host)
  #              unless 'ROOT' eq $grids_db->{'host_to_dept'}{$host};
  #     }
  #   
  ### But more efficient to do only for SM_HOST and AGG_HOST as below ...

  # add hosts for ROOT dept as special case:
  foreach $host (@{$root_dept->{SELECTED_HOSTS}}) {
    $h->add_host ($root_dept->{DEPT_NAME}, $host,
                  $grids_db->{'module_ports'}{$host})
      unless exists $h->{hierarchy}{$host};
    }

### RC DBG::  need to have done SEMI TOPO SORT on departmental parentage,
  # because invalid to add_dept before its parent exists.
  # So Grdbm:88 now pushes kids onto new per-parent hashlist:
  #         # create a semi topological sort by departmental parentage:
  #         push @{$child_depts{$dept->{PARENT_DEPT}}}, $dept;
  #
  my $dept;
  my @depts_to_add = @{$Grdbm::child_depts{ROOT}};

  while (@depts_to_add) {
    $dept = shift (@depts_to_add);
    # RC BUG: catch22!    $h->add_host ($dept->{DEPT_NAME}, $dept->{SM_HOST},
    # RC BUG: catch22!          $grids_db->{'module_ports'}{$dept->{SM_HOST}});

    if ($rcdbg) {
      $errmsg = "Doing add_dept HIERARCHY transaction for dept "
              . "<$dept->{DEPT_NAME}> ...\n";
      warn ($errmsg);
      $log->warn ($errmsg);
      }

    unless ( (exists $h->{hierarchy}{$dept->{SM_HOST}})  &&
             (ref $h->{hierarchy}{$dept->{SM_HOST}} eq 'Host') ) {
        $h->add_host ($dept->{PARENT_DEPT}, $dept->{SM_HOST},
                      $grids_db->{'module_ports'}{$dept->{SM_HOST}});
      } # RC BUG KLUDGE:  catch22 must add_host to pre-existing PARENT;
        #                         then add_dept; then move_host to reclaim it.

    unless ( (exists $h->{hierarchy}{$dept->{AGG_HOST}})  &&
             (ref $h->{hierarchy}{$dept->{AGG_HOST}} eq 'Host') ) {
        $h->add_host ($dept->{PARENT_DEPT}, $dept->{AGG_HOST},
                      $grids_db->{'module_ports'}{$dept->{AGG_HOST}});
      }

    # add it;
    ($status,$message) = $h->add_dept ($dept->{PARENT_DEPT}, $dept->{DEPT_NAME},
	                               $dept->{SM_HOST}, $dept->{AGG_HOST});
    &die_nice ($message) unless $status;
    # now safe to put its kids in the queue to add:
    push @depts_to_add, @{$Grdbm::child_depts{$dept->{DEPT_NAME}}};

    foreach $host (@{$dept->{SELECTED_HOSTS}}) {
      if ($host ne $dept->{SM_HOST}  &&  $host ne $dept->{AGG_HOST}) {
        $h->add_host ($dept->{DEPT_NAME}, $host,
                      $grids_db->{'module_ports'}{$host})
          unless exists $h->{hierarchy}{$host};
          # May have been added already, if it hosts SM for an ancestor dept.
        }
      else { # RC BUG KLUDGE:  catch22 followup (see catch22 note above):
             # re-assert ownership of our host, after catch22 critical section.
        $h->move_host ($dept->{DEPT_NAME}, $host);
        # NOTE:  If our SM_HOST is *not* in our dept, but we had to add_host
        # on it above, then another dept will move_host to reclaim it later.  
        }
      }
    $h->{'hierarchy'}->save("$ENV{'GRIDSPATH'}/log/output_hierarchy",
                            'prettyprint')
      if $rcdbg;
    }
  $grids_db->save();


#----------------------------- Rulesets --------------------------------------#

foreach $rs_name ( @default_rs_name ) {
    
    $default_rs = `cat $ENV{'GRIDSPATH'}/rulesets/$rs_name.rs`;
    ($status,$message) = 
	$h->set('ROOT',0,0,"rulesets\{$rs_name\}",$default_rs);
    &die_nice ($message) unless $status;
}

#--------------------- Finish Up for GrIDS start -------------------------#

$h->{'hierarchy'}->save("$ENV{'GRIDSPATH'}/log/output_hierarchy",'prettyprint');
$cmd = "cp -p $ENV{'GRIDSPATH'}/user_interface/.grdbm" . 
            " $ENV{'GRIDSPATH'}/log/grdbm";            # RC 5/24
`$cmd`;   # save config in log dir
die "GRIDS STARTUP exiting normally.\n\n";
exit;

GRIDS_CHECK:

  print "\nBeginning tests\n";

  if($ARGV[0] eq 'check')
   {
    my ($ohs_host,$ohs_port) = split(/:/,$Hierarchy_interface::ohs_location);
 
    $log = new Clog('check_grids_test','N/A',"check_grids.test.log"); # Set up our log
    $log->{'central_log'} = 0; # No central logging - presently broken.
 
    ($ok, $h) = new Hierarchy_interface($user,$pass,'ROOT',$log,'comm','main::FILE');
    $log->die("done!\nok=$ok\nhi=$hierarchy\n") unless $ok;
   }
  die "Interface hierarchy internally inconsistent - see log for details\n"
	unless($h->is_consistent());
  die "OHS and interface inconsistent - see log for details\n"
	unless($h->matches_ohs());
  die "Interface hierarchy inconsistent with managers - see log for details\n"
	unless($h->matches_managers());
  die "Aggregators internally inconsistent - see log for details\n"
        unless($h->matches_aggregators());
  die "Rulesets internally inconsistent - see log for details\n"
        unless($h->rulesets_consistent('ROOT'));

  $h->{'hierarchy'}->save("$ENV{'GRIDSPATH'}/log/output_hierarchy",
                                                      'prettyprint');
 
  my $completion = "++++++++++++++++SUCCESS - grids is ok!++++++++++++++++";
  print $completion,"\n"; $log->warn($completion);
  exit; 

#---------------------  Extra commands desired  -------------------------#
GRIDS_EXTRA:

  my ($ohs_host,$ohs_port) = split(/:/,$Hierarchy_interface::ohs_location);
 
  $log = new Clog('grids_test','N/A',"grids.test.log"); # Set up our log
  $log->{'central_log'} = 0; # No central logging - presently broken.

# $grids_db->parse_config_file ($grdbm_standard_config, $grdbm_personal_config);
# could do this here, even if no grids is running.  But must exit before new():
 
  ($ok, $h) = new Hierarchy_interface($user,$pass,'ROOT',$log,'comm','main::FILE
');
  $log->warn("done!\nok=$ok\nhi=$hierarchy\n") unless $ok;


### Test misc move_dept and move_host configs::
  print "\nDo you want to move a DEPT (D/d) or move a HOST (H/h)? ";
  $res = <STDIN>;
  while (1) {
    if ($res =~ /^D/i) {
      print "\nEnter name of DEPT to move: ";
      $dept = <STDIN>; chomp $dept;
      next unless $dept;
      print "\nEnter name of its NEW PARENT dept: ";
      $new_parent = <STDIN>; chomp $new_parent;
      next unless $new_parent;
      ($status, $h) = $h->move_dept ($new_parent, $dept);
      }
    elsif ($res =~ /^H/i) {
      print "\nEnter name (*not* qualified) of HOST to move: ";
      $host = <STDIN>; chomp $host;
      next unless $host;
      $host .= '.cs.ucdavis.edu';
      print "\nEnter name of its NEW PARENT dept: ";
      $new_parent = <STDIN>; chomp $new_parent;
      next unless $new_parent;
      ($status, $h) = $h->move_host ($new_parent, $host);
      }
    elsif ($res =~ /^$|^Q|^E/i) {   # RETURN or QUIT or EXIT
      last;
      }
    unless ($status) {
      print "\nERROR!  ==> $h\n\n";
      }
    $h->{'hierarchy'}->save("$ENV{'GRIDSPATH'}/log/output_hierarchy",
                                                        'prettyprint');
    print "\nNow check the output_hierarchy ...\n\n";
    print "\nDo you want to move a DEPT (D/d) or move a HOST (H/h)? ";
    $res = <STDIN>;
  }

# add_dept ($dept->{PARENT_DEPT}, $dept->{DEPT_NAME}
#           $dept->{SM_HOST}, $dept->{AGG_HOST});
# $h->move_host ($dept->{DEPT_NAME}, $host);

  exit;

#---------------------  View Hierarchy -------------------------#

GRIDS_VIEW:
  my ($ohs_host,$ohs_port) = split(/:/,$Hierarchy_interface::ohs_location);

  $log = new Clog('grids_test','N/A',"grids.test.log"); # Set up our log
  $log->{'central_log'} = 0; # No central logging - presently broken.

  ($ok, $h) = new Hierarchy_interface($user,$pass,'ROOT',$log,'comm','main::FILE');
  $log->warn("done!\nok=$ok\nhi=$hierarchy\n") unless $ok;
  
  goto OUTPUT;


OUTPUT:

  $h->{'hierarchy'}->save("$ENV{'GRIDSPATH'}/log/output_hierarchy",
                                                      'prettyprint');
  $h->{'hierarchy'}->save("$ENV{'GRIDSPATH'}/log/config_hierarchy",
                                                'parse_config_file');
  ## The above will generate a file in format that can be read-in
  ## as a startup config file, to ensure you exactly repeat a
  ## particular configuration that may have been randomly selected.
  ## (Although you may not get same *ports*, you will get same hosts.)

  exit;

# ------------------------------ Cleanup --------------------------------#

GRIDS_STOP:

  # cleanup after ourselves
  &slay($grids_db->{'ohs_host'},'ohs\/ohs',1);
  &slay($grids_db->{'root_sm_host'},'sm\/sm',1);

  if($ARGV[1] eq '-module') 
   {
    # We plan to leave the module controllers running.
    quick_slay('kill_sm',$grids_db,@{$grids_db->{'module_hosts'}});
    $grids_db->reduce_to_modules();   
   }
  else
   {  
    quick_slay('kill_sm suicide',$grids_db,@{$grids_db->{'module_hosts'}});
    $grids_db->empty();
   }
  &clean_up_files();
  exit;

# ------------------- Alternate speedy cleanup ------------------#

GRIDS_DESTROY:

  # cleanup after ourselves

  $fork_cnt = 0;
  $fork_err = 0;
  do $Grdbm::grdbm_location;                   ## RC 5/5/97
  $cmd = "cp -p $ENV{'GRIDSPATH'}/user_interface/.grdbm" . 
              " $ENV{'GRIDSPATH'}/log/grdbm";            # RC 5/24
  `$cmd`;   # save config in log dir

  @kill_list = keys %{$Grdbm::self->{'host_to_dept'}};
  push (@kill_list, $Grdbm::self->{'root_sm_host'}) unless
      ${$Grdbm::self->{'host_to_dept'}}{$Grdbm::self->{'root_sm_host'}};
  push (@kill_list, $Grdbm::self->{'ohs_host'}) unless 
      ${$Grdbm::self->{'host_to_dept'}}{$Grdbm::self->{'ohs_host'}};

  my $uname = `/usr/bin/uname`;

  foreach $host (@kill_list)
   {
    next unless $host;      # this may be called to kill a semi-born grids
    FORK:
    if ($pid = fork) {
      print "Cleaning up on $host\n";
      $fork_cnt++;
      $fork_err = 0;
    } elsif (defined $pid) {
         &slay($host,'ohs|sniffer|module_controller|'.
        'tcpdump|sm\/sm|engine.pl|exemon',1);
         exit;
    } else {
      die ("Unexpected ERROR on fork.\n") if $fork_err > 0;
      print ("ERROR on fork for $host.\n");
      print ("grids DESTROY will complete those jobs already started before continuing.\n");
      $fork_err++;
      until ($fork_cnt == 0)
       {
        print "Waiting for $fork_cnt slays to complete.\n";
        wait;
        $fork_cnt--;
       } 
      goto FORK;
     }# end else
   }

   until ($fork_cnt == 0)
   {  
       print "Waiting for $fork_cnt slays to complete.\n";
       wait;
       $fork_cnt--;
   }
      
  $grids_db->empty();
  &clean_up_files();
  exit;

# ------------------------------ Subroutines --------------------------------#

sub catch_alarm {   # used by &user_input() startup if $interactive_auto_start
                    # used by &Grdbm::ultrix_ping() on CSIF decs.
  $alarm_sounded = 1;
  if (defined $Grdbm::ping_child_pid) {  # we were called by &ultrix_ping();
    kill ('SIGKILL', $Grdbm::ping_child_pid);
    }
  }



sub user_input {
  my ($timer, $msg, $default_result) = @_;
  $timer = 5 unless $timer;
  die unless $interactive_startup;
  if ($interactive_auto_start) {
    $alarm_sounded = 0;
    alarm $timer;    # give user $timer seconds to enter a manual choice.
    }
  my $res = <STDIN>;
  if ($alarm_sounded) {
    print "\nNo input entered before timeout.  $msg\n";
    }
  else {
    alarm 0;    # cancel alarm
    chomp $res;
    }
  return $res || $default_result;
  }



sub choose_another_host {
  my ($old_host, $dept, $msg) = @_;
  $msg = 'Will choose random replacement.' unless $msg;
  my $host;

  while (1) {
    print "\nIn dept $dept, SPECIFIC HOST $old_host FAILED START-UP TEST.\n"
        . "Enter an alternative (fully qualified) hostname: ";
    $host = &user_input (9, $msg, 0);
    if ($grids_db->{'host_to_dept'}{$host}) {
      print "That alternative host is already claimed by dept "
             . $grids_db->{'host_to_dept'}{$host} . "\n";
      }
    else {
      last;
      }
    }
  $host = 'DIE' if ($host =~ /^quit$|^exit$|^die$/i);
  return $host;
  }



sub change_host_test_ok {   # Opportunity for user to alter stresstest parms.

  print "\nPING test parm == <$Grdbm::max_ping_delay> seconds as max"
      . " ping delay.\nEnter NEW VALUE to change it? ";
  $res = <STDIN>;
  if ($res =~ /^(\d+)$/) {
    $Grdbm::max_ping_delay = $1;
    print "PING test NEW max ping delay == <$1> seconds.\n";
    }

  print "\nSWAP test parm == <$Grdbm::min_avail_swap> kbytes as min"
      . " available threshold.\nEnter NEW VALUE to change it? ";
  $res = <STDIN>;
  if ($res =~ /^(\d+)$/) {
    $Grdbm::min_avail_swap = $1;
    print "SWAP test NEW min. available threshold == <$1> kbytes.\n";
    }

  print "\nFORK test parm #1 == <$Grdbm::num_forks_tried> as number"
      . " of forks to try.\nEnter NEW VALUE to change it? ";
  $res = <STDIN>;
  if ($res =~ /^(\d+)$/) {
    $Grdbm::num_forks_tried = $1;
    print "FORK test NEW number of forks to try == <$1>.\n";
    }

  print "\nFORK test parm #2 == <$Grdbm::max_fork_time> seconds as max"
      . " time for forks to succeed.\nEnter NEW VALUE to change it? ";
  $res = <STDIN>;
  if ($res =~ /^(\d+)$/) {
    $Grdbm::max_fork_time = $1;
    print "FORK test NEW max time for forks to succeed == <$1>.\n";
    }
  print "\n";

}



sub die_nice {
  $grids_db->save() if ref $grids_db eq Grdbm;
  $h->{'hierarchy'}->save("$ENV{'GRIDSPATH'}/log/output_hierarchy")
    if ref $h eq Hierarchy_interface;
  warn ($_[0]);
  $log->warn ($_[0]);
# $log->die ($_[0]);
  goto GRIDS_DESTROY if ref $grids_db eq Grdbm;
  exit;
  }


sub clean_up_files
{
  # Miscellaneous cleanup - (don't presently do /tmp)
  `rm -rf $ENV{'HOME'}/ohs_*`;
  `rm -rf $ENV{'HOME'}/sm_*`;
  `rm -rf /tmp/mod_output.temp.*`;
  `rm -rf $ENV{'GRIDSPATH'}/module_controller/sm_state.*`;
  `rm -f $ENV{'GRIDSPATH'}/module_controller/*.status`;
# `rm -f $ENV{'GRIDSPATH'}/log/*.cmd`;
#        all cmd files are in log, not module_controller,
#        but if any linger, they are bug evidence that should be preserved.
# `rm -f $ENV{'GRIDSPATH'}/log/*.status`;
#        status files are small, and may offer clues
  `rm -rf $ENV{'GRIDSPATH'}/log/[0-9]*`;
}


sub clean_up_logs
{
  if(-d "$ENV{'GRIDSPATH'}/log.$nlogs")
   {
    $command = "rm -rf $ENV{'GRIDSPATH'}/log.$nlogs";
    #print STDERR "About to: $command\n";
    `$command`;
   }
  foreach $i (reverse (1..$nlogs-1))
   {
    if(-d "$ENV{'GRIDSPATH'}/log.".$i)
     {
      $command = "mv -f $ENV{'GRIDSPATH'}/log.$i $ENV{'GRIDSPATH'}/log.".($i+1);
      #print STDERR "About to: $command\n";
      `$command`;
     }
   }
  if(-d "$ENV{'GRIDSPATH'}/log")
   {
    if(!(-d "$ENV{'GRIDSPATH'}/log.1"))
     {
      $command = "mkdir $ENV{'GRIDSPATH'}/log.1";
      `$command`;
     }
    $command = "mv -f $ENV{'GRIDSPATH'}/log/* $ENV{'GRIDSPATH'}/log.1";
    `$command`;
   }
  else
   {
    warn("No log directory found");
   }
  mkdir("$ENV{'GRIDSPATH'}/log",0755);
}



sub init_startup_env {     # misc init stuff, some from ENV vars

  $| = 1;       # unbuffer STDOUT

  $rcdbg = 1;   ## RC BUG -- verbose; for debugging startup only.

  $interactive_startup = 1;
     # Allows user to choose another host if a SPECIFIC HOST fails at startup.
  if (exists $ENV{'GRIDS_interactive_startup'}) {
    $interactive_startup = $ENV{'GRIDS_interactive_startup'};
    }
  
  $interactive_auto_start = 1;
     # Mimics an interactive user typing in helpful responses.
  if (exists $ENV{'GRIDS_interactive_auto_start'}) {
    $interactive_auto_start = $ENV{'GRIDS_interactive_auto_start'};
    }

  if (exists $ENV{'GRIDS_max_ping_delay'}) {
    $Grdbm::max_ping_delay = $ENV{'GRIDS_max_ping_delay'};
    }

  if (exists $ENV{'GRIDS_do_swap_test'}) {
    if ($ENV{'GRIDS_do_swap_test'} =~ /^Y/i) {
      $Grdbm::do_swap_test = 1;
      }
    elsif ($ENV{'GRIDS_do_swap_test'} =~ /^N/i) {
      $Grdbm::do_swap_test = 0;
      }
    elsif ($ENV{'GRIDS_do_swap_test'} =~ /^\d+$/) {
      $Grdbm::do_swap_test = 1;
      $Grdbm::min_avail_swap = $ENV{'GRIDS_do_swap_test'};
      }
    else {
      die ("Value of ENV{'GRIDS_do_swap_test'} is bogus, should be Y/N/digits");
      }
    }
  
  if (exists $ENV{'GRIDS_do_fork_test'}) {
    if ($ENV{'GRIDS_do_fork_test'} =~ /^Y/i) {
      $Grdbm::do_fork_test = 1;
      }
    elsif ($ENV{'GRIDS_do_fork_test'} =~ /^N/i) {
      $Grdbm::do_fork_test = 0;
      }
    elsif ($ENV{'GRIDS_do_fork_test'} =~ /(\d+) forks in (\d+) sec/) {
      $Grdbm::do_fork_test = 1;
      $Grdbm::num_forks_tried = $1;
      $Grdbm::max_fork_time = $2;
      }
    else {
      die ("Value of ENV{'GRIDS_do_fork_test'} is bogus, should be Y, N, "
         . "or '[digits] forks in [digits] sec'\n");
      }
    }

  # Standard startup config stuff from CVS:
  $grdbm_standard_config = "$ENV{'GRIDSPATH'}/user_interface/grids.db";
   
  # Personal startup config stuff; any elements here override data from CVS:
  $grdbm_personal_config = "$ENV{'GRIDSPATH'}/user_interface/grids.db.mine";
  # Elements should include   @Grdbm::dept_geometry
}
 

sub replace_sm_agg_host {

my ($sm_or_agg, $dept) = @_;
my $field_name = $sm_or_agg . '_HOST';
my ($ok);
my $dept_index = $grids_db->{'dept_to_index'}{$dept};
my $config_obj = $Grdbm::dept_geometry[$dept_index];

while (!$ok) {
  print "Desired $sm_or_agg for dept <$dept->{DEPT_NAME}> " .
        "on UNCHOSEN host: $dept->{$field_name}\n";
  print "Currently SELECTED_HOSTS for this dept are:\n" .
         (join ' ', @{$config_obj->{SELECTED_HOSTS}}) . "\n";
  print "Enter an alternative (fully qualified) hostname in *ANY* dept: ";
  $dept->{$field_name} = <STDIN>; chomp $dept->{$field_name};
  ### RC todo:  timer logic to choose a random SELECTED_HOSTS if no input.
  if (defined $grids_db->{'host_to_dept'}{$dept->{$field_name}}) {
    $ok = 1;
    }
# else {
#   print "Do you want to ADD host $dept->{$field_name} to this dept? (Y/N) ";
#   $ok = <STDIN>;  print "\n";
#   $ok = 0 unless $ok =~ /Y/i;
#   ### RC BUG:  awkward to ADD at this late stage.
#   }
  }

}












