#!/usr/bin/perl
use strict;
my ($filename, $threshold, $fh, @word_refs, $word_ref);
($filename, $threshold) = @ARGV;
# Threshold must be an integer
if (!($threshold =~ /^\d+$/)) {
print "The threshold must be a number.\n";
exit 1;
}
# Read file and tally word frequencies.
# Original case of word does not matter: "the" = "The" = "THE"
open $fh, "<", $filename;
while (my $line = <$fh>) {
chomp($line);
my $found = 0;
foreach $word_ref (@word_refs) {
if (lc($word_ref->[0]) eq lc($line)) {
$found = 1;
$word_ref->[1]++;
}
}
# initialize a new word with a frequency of 1
if (not $found) {
push(@word_refs, [$line, 1]);
}
}
# Print words and their frequencies, sorted alphabetically by word. Only
# print a word if its frequency is greater than or equal to the threshold.
foreach $word_ref (sort {lc($a->[0]) cmp lc($b->[0])} @word_refs) {
next if $word_ref->[1] < $threshold;
printf "%4d %s\n", $word_ref->[1], $word_ref->[0];
}
I don't have a lot to say, but this is my little bit.
Sunday, October 28, 2012
wordcount.pl - Count Unique Word In Input
This code counts up the instances for each unique word in the input, and prints the words which surpass a given threshold.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment