#!/usr/bin/perl -w sub usage { print STDERR "usage: legion_run_multi [-v] [-n num_nodes] [-t times] [-s schedule]\n"; print STDERR " -f specification_file program [args...]\n"; exit 1; } use POSIX; use Getopt::Std; getopts ("vf:n:t:s:"); &usage if !defined ($opt_f); &usage if (!defined ($opt_n) && !defined ($opt_s)); $program = shift; &usage if !defined ($program); $args = "". join (" ", @ARGV); $mommie = $$; open (SPEC, "<$opt_f") || die "can't open specification file $opt_f"; while () { chomp; next if /^\s*$/; next if /^#/; @words = split; if ($words[0] eq "IN") { push (@ins, $words[1]); if (defined ($words[2])) { $in_pat = $words[2]; $in_single_word = $words[1]; } } elsif ($words[0] eq "CONSTANT") { push (@constants, $words[1]); } elsif ($words[0] eq "constant") { push (@g_constants, $words[1]); } elsif ($words[0] eq "OUT") { push (@outs, $words[1]); } elsif ($words[0] eq "out") { push (@g_outs, $words[1]); } elsif ($words[0] eq "console") { $console = $words[1]; } else { die "unknown line in specifications file $opt_f: <$_>"; } } close (SPEC) || die "can't close specifications file $opt_f"; if (defined ($in_pat) && defined ($t_opt)) { die "Can't specify an `in' pattern and a `times'"; } if (!defined ($in_pat) && !defined ($t_opt)) { die "No pattern specified in any `in' lin in specifictions file $opt_f"; } opendir (DIR, ".") || die "can't open directory .!"; (@entries = readdir (DIR)) || die "can't get entries!"; closedir (DIR); $in_pat =~ s/\./\\./g; $in_pat =~ s/\*/.*/g; foreach (@entries) { push (@work,$_) if /^$in_pat/; } $count = 0; $out = $outs[0]; if foreach $w (@work) { $w =~ s/^$in_single_word//; if (-e "$out$w") { print "$out$w exists, skipping that one.\n"; } else { push (@name_list, $w); $count++; } } die "No work to be done. Exiting." if $count == 0; if (defined ($opt_s)) { $opt_n = 0; open (SCHED, "<$opt_s") || die "can't open schedule unix_file: $opt_s: $!"; while () { next if /^\#/; next if /^\s*$/; ($host, $number) = split; if (!defined ($host) || !defined ($number) ) { die "unknown line <$_> in schedule file $opt_s"; } if (defined ($hosts{$host})) { die "duplicate host $host in schedule file"; } $hosts{$host} = $number; $opt_n += $number; } close (SCHED) || die "can't close schedule unix_file: $opt_s: $!"; print "Sum of $opt_n workers available.\n" if ($opt_v); } print "Running up to $opt_n simultaneous jobs...\n" if ($opt_v); $jobs_running = 0; $jobs_done = 0; $jobs_started = 0; if (!defined ($opt_s)) { while ($jobs_running < $opt_n) { if ($jobs_started < $count) { # Let's slow things down a bit to prevent pounding too hard sleep (5); &start_one_job (shift @name_list, undef); print $#name_list+1, " jobs remain to be started.\n" if ($opt_v); } } } else { LOOP: while (1) { $done = 1; foreach $h (keys %hosts) { if ($jobs_started >= $count) { last LOOP; } print "Starting job on $h\n" if ($opt_v); $done = 0; &start_one_job (shift @name_list, $h); $hosts{$h} --; if ($hosts{$h} <= 0) { delete $hosts{$h}; } } if ($done) { last LOOP; } } } while ($jobs_done < $count) { # print $#name_list+1, " jobs remaining.\n"; sleep (1); &check_jobs(); } sub start_one_job { # jobid hostname my $n = $_[0]; my $host = $_[1]; my $d = "legion_flogger_$mommie$n"; $jobs_running++; $jobs_started++; print "Starting job: $n" if ($opt_v); if (defined ($host) && ($host ne "-1")) { $host_str = "-h $host"; print " on host $host" if (($opt_v) && defined($host)); } else { $host_str = ""; } print "\n" if ($opt_v); print "Making work directory for $n\n" if ($opt_v); mkdir ("$d", 0777); foreach $i (@ins) { # create symlink symlink ("../$i$n", "$d/$i") || die "can't symlink $i$n"; } foreach $c (@constants) { symlink ("../$c", "$d/$c") || die "can't symlink $c"; } $pid = fork; if ($pid != 0) { # parent if (defined ($host)) { $pids{$pid} = $host; } else { $pids{$pid} = -1; } sleep 1; } else { # child $in_string = ""; if ($opt_v) { $in_string .= " -v "; } foreach $i (@ins) { $in_string .= " -IN $d/$i"; } foreach $c (@constants) { $in_string .= " -IN $d/$c"; } foreach $c (@g_constants) { $in_string .= " -in $c"; } $out_string = ""; foreach $o (@outs) { $in_string .= " -OUT $d/$o"; } $out_string = ""; foreach $o (@g_outs) { $in_string .= " -out $o$n"; } $console_string = ""; if ($console) { $console_string = " -o $console"; } system "echo \"legion_run $host_str $in_string $out_string $console_string $program $args\"" if ($opt_v); system "legion_run $host_str $in_string $out_string $console_string $program $args"; print "job $n finishes.\n" if ($opt_v); $incomplete = 0; foreach $o (@outs) { if (! -e "$d/$o") { $incomplete = 1; } else { system "mv $d/$o $o$n"; } } if ($console) { if (! -e "$d/$console") { $incomplete = 1; } else { system "mv $d/$console $console$n"; } } if ($incomplete) { print "Output for job $n seems to be incomplete.\n"; } system "rm -rf $d"; exit (0); } } sub check_jobs { foreach $p (keys %pids) { if (POSIX::waitpid ($p, POSIX::WNOHANG) == -1) { print "Process $p seems to have finished.\n" if ($opt_v); $jobs_done++; $jobs_running--; if ($jobs_started < $count) { &start_one_job (shift @name_list, $pids{$p}); } delete $pids{$p}; } else { # print "Process $p is still alive.\n"; } } }