#!/usr/bin/perl

# This is a letter-frequency analyzer by Chris Staecker
# Modified 4/28/08


use strict;
use CGI;

my $query = new CGI;

sub maketop() {
	print "<html><head><title>Chris Staecker: Frequency counter</title>";
	print "<link rel=stylesheet type='text/css' href='../pcs.css'></head>";
	print "<h1><a href='http://www.messiah.edu/~cstaecker'>Chris Staecker</a>'s frequency counter!</h1>";
	print "<p>This tool can help you crack a substitution or Vigenere cipher. Type your text below, and hit the button to compute the letter frequencies. You can also count frequencies in groups of letters offset by a particular number. (Choosing 5 offsets will count the frequencies in the groups of letters obtained by taking every 5th letter from the text.)</p>";
	print "<form name='freq' action='' method='POST'>";
	print "<p><textarea name='text' rows='20' cols='60'>";
	if ($query->param("text")) {
		print $query->param("text");
	} else {
		print "Type the cyphertext here";
	}
	print "</textarea><br>";

	print "Count frequency using this many offsets: <select name='offset'>";
	foreach (1..10) {
        if ($query->param("offset") == $_) {
			print "<option selected>$_</option>";
		} else {
			print "<option>$_</option>";
		}
	}
	print "</select><br>";
	print "<input type='submit' value=\"Count'em up!\"><br>";
}

sub isletter {
	my ($c) = @_;

	return (ord($c) > 64 and ord($c)<91);
}

sub makeoffsetfreqs {
	my ($t, $skip, $o) = @_;

	my %freq;

	my $total;

	# all upper-case
	$t = uc($t);

	# strip out all non alphas
	my $s = '';

	while ($t) {
		my $c = chop($t);

		if (isletter($c)) {
			$s = $c . $s;
		}
	}

	$t = $s;	

	# start at the offset
	$t = substr($t, $o);

	# reverse it since chop takes off the end
	$t = reverse($t);

	while ($t) {
		my $c = chop($t);

		$freq{$c}++;

		# skip some letters
		foreach (1..$skip-1) {
			chop($t);
		}
	}


	# go through to compute the total count, and put in 0s for missed letters
	foreach (65..90) {
		if ($freq{chr($_)}) {
			$total += $freq{chr($_)};
		} else {
			$freq{chr($_)} = 0;
		}
		
	}

	# print the frequencies, highest first
	while (keys %freq) {
		my $maxkey = (keys %freq)[0];
		foreach (keys %freq) {
			if ($freq{$_} > $freq{$maxkey}) {
				$maxkey = $_;
			}
		}

   		if ($freq{$maxkey}) { 
			print "$maxkey: ";
			printf("%.3f", 100 * $freq{$maxkey} / $total);
			print "%<br>";
		} else {
			print "$maxkey: 0<br>";
		}

		delete $freq{$maxkey};

	}
}


print $query->header();

maketop();



if ($query->param('text')) {
	my %freq;
	my $t = $query->param('text');
	my $o = $query->param('offset');

	print "<h2><a name='results'>Results</a></h2>";
	print "<p><table><tr>";
	for (0..$o-1) {
		print "<td>Offset $_</td>";
	}
	print "</tr><tr>";
	for (0..$o-1) {
		print "<td width='150'><tt>";
		makeoffsetfreqs($t, $o, $_);
		print "</tt></td>";
	}
	print "</tr></table></p>";
	
}

print "<hr><p><span class='small'>You can see the <a href='frequencycgi.txt'>source code</a> for this CGI script</p>";
print "</body></html>";
