#!/usr/bin/perl use strict; my ($filename, $threshold, $fh, @word_refs, $word_ref); ($filename, $threshold) = @ARGV; # Threshold must be an integer if (!($threshold =~ /^\d+$/)) { print "The threshold must be a number.\n"; exit 1; } # Read file and tally word frequencies. # Original case of word does not matter: "the" = "The" = "THE" open $fh, "<", $filename; while (my $line = <$fh>) { chomp($line); my $found = 0; foreach $word_ref (@word_refs) { if (lc($word_ref->[0]) eq lc($line)) { $found = 1; $word_ref->[1]++; } } # initialize a new word with a frequency of 1 if (not $found) { push(@word_refs, [$line, 1]); } } # Print words and their frequencies, sorted alphabetically by word. Only # print a word if its frequency is greater than or equal to the threshold. foreach $word_ref (sort {lc($a->[0]) cmp lc($b->[0])} @word_refs) { next if $word_ref->[1] < $threshold; printf "%4d %s\n", $word_ref->[1], $word_ref->[0]; }
I don't have a lot to say, but this is my little bit.
Sunday, October 28, 2012
wordcount.pl - Count Unique Word In Input
This code counts up the instances for each unique word in the input, and prints the words which surpass a given threshold.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment