#!/usr/bin/perl

%hash = ();

###### parse the years that don't have race data

@noraceyears = ('1920','1930','1940','1950','1960','1970');
foreach $year (@noraceyears) {
  open FHAND, "<data/$year.txt";
  while (<FHAND>) {
    chomp;
    if (/^ +(\d)   (\S+) +(\d\d\d\d\d)\s*$/) {
      ($gen,$name,$count) = ($1,$2,$3);
      $count =~ s/^0*//g;
      $gender = ($gen == 1) ? "MALE" : "FEMALE";
      $hash{$year}{"ALL"}{$gender}{$name} = $count;
    }
  }
  close FHAND;
}

###### parse the years that have some race data

@partialraceyears = ('1965','1975','1980','1985','1990','1995','2000');
foreach $year (@partialraceyears) {
  open FHAND, "<data/$year.txt";
  %partial = ();
  while (<FHAND>) {
    chomp;
    if (/ RACE +\S+ +(.*?) +\d/) {
      $race = $1;
    }
    if (/ ETHNICITY +\S+ +(.*?) +\d/) {
      $race = $1;
    }
    $race = "BLACK" if ($race eq "OTHER BLACK");
    $race = "WHITE" if ($race eq "OTHER WHITE");
    $race = "ASIAN" if ($race eq "OTHER ASIAN");
    $race = "ASIAN" if ($race eq "ASIAN & PACIFIC ISL.");
    if (/^ +(\d)   (\S+) +(\d\d\d\d\d)\s*$/) {
      ($gen,$name,$count) = ($1,$2,$3);
      $count =~ s/^0*//g;
      $gender = ($gen == 1) ? "MALE" : "FEMALE";
      $hash{$year}{$race}{$gender}{$name} = $count;
      $partial{$gender}{$name} += $count;
    }
  }
  foreach $gender ("MALE", "FEMALE") {
    %hp = %{$partial{$gender}};
    foreach $name (keys %hp) {
      $hash{$year}{"ALL"}{$gender}{$name} = $hp{$name};
    }
  }
  close FHAND;
}

###### parse the recent years

@fullraceyears = ('2002','2003','2004');
foreach $year (@fullraceyears) {
  open FHAND, "<data/$year.txt";
  %partial = ();
  while (<FHAND>) {
    chomp;
    if (/Male/) { $gender = "MALE"; }
    if (/Female/) { $gender = "FEMALE"; }
    if (/All/) { $race = "ALL"; }
    if (/Hispanic/) { $race = "HISPANIC"; }
    if (/Asian/) { $race = "ASIAN"; }
    if (/White/) { $race = "WHITE"; }
    if (/Black/) { $race = "BLACK"; }
    if (/^\d+ (\S+) (\d+)\s*$/) {
      ($name,$count) = ($1,$2);
      $count =~ s/^0*//g;
      $hash{$year}{$race}{$gender}{$name} = $count;
      if ($race eq "ALL") {
        $partial{$gender}{$name} += $count;
      } else {
        $partial{$gender}{$name} -= $count;
      }
    }
  }
  foreach $gender ("MALE", "FEMALE") {
    %hp = %{$partial{$gender}};
    foreach $name (keys %hp) {
      $hash{$year}{"OTHER"}{$gender}{$name} = $hp{$name};
    }
  }
  close FHAND;
}

print join "\t", "Year", "Race", "Sex", "Name", "Count", "Rank";
print "\n";

sub byh4 {
  return $h4{$b} <=> $h4{$a};
}

foreach $year (sort keys %hash) {
  %h2 = %{$hash{$year}};
  foreach $race (sort keys %h2) {
    %h3 = %{$h2{$race}};
    foreach $gender ("MALE", "FEMALE") {
      %h4 = %{$h3{$gender}};
      @names = sort byh4 keys %h4;
      $lastnum = 99999999;
      $rank = 0;
      $realrank = 0;
      foreach $name (@names) {
        $realrank++;
        if ($h4{$name} != $lastnum) {
          $lastnum = $h4{$name};
          $rank = $realrank;
        }
        next if ($h4{$name} == 0);
        print join "\t", $year, $race, $gender, $name, $h4{$name}, $rank;
        print "\n";
      }
    }
  }
}


