Nicholas Rinard Keene's Little Bit

Monday, November 26, 2012

Hangman Clever Guessing Strategy

This is a clever guessing strategy for hangman, implemented against a game engine which I cannot show you because I did not write it. This was my answer to an interview code test. This code uses the utility class SmartDictionary as an optimization.

```import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

/**
* A strategy for generating guesses given the current state of a Hangman game.
*
* The strategy is to build a list of all possible words which match the current state of the game (the "matching set", then guess the most common letter
* among those words. This strategy is chosen specifically for the overwhelming benefit of guessing correctly: the guess does not count against you.
*  Moreover, if it is wrong the list words is reduced substantially because it contains no words with that popular letter.
*
* This is not the only way to make a clever guess. Another way would be to look at the way a guessed letter would divide the matching set into somewhat
* evenly sized pieces. That is, it is superior to guess a letter which will divide the list into ten evenly sized lists, rather than into two evenly sized lists,
* or into one large list and many small ones.
*/
public class CleverGuessingStrategy implements GuessingStrategy {
private static final String WORDS_FILE = "/workspace/Hangman/src/words.txt";
private static final int MAX_GUESSES = 5;

/**
* Create a HangmanGame and run a CleverGuessingStrategy against it.
* @param args
*/
public static void main(String[] args) {
try {
// read in the dictionary
ArrayList<String> words = new ArrayList<String>();
String line = null;
}
SmartDictionary dict = new SmartDictionary(words);

// create a list of words to play against
ArrayList<String> secretWords = words;

// play a game for each word in the dictionary
for(String word: secretWords) {
HangmanGame game = new HangmanGame(word, MAX_GUESSES);
GuessingStrategy strategy = new CleverGuessingStrategy(dict);

while(game.gameStatus() == HangmanGame.Status.KEEP_GUESSING) {
strategy.nextGuess(game);
}
System.out.println(String.format("Score: %2d - \"%s\": ", game.currentScore(), word));
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

private SmartDictionary dict;

// we use these two members as an optimization
// we save a lot of time by not recalculating the previous matching set
private HangmanGame previousGame = null;
private List<String> previousMatchingSet = null;

/**
* Constructor; takes a dictionary of words
*
* @param dict
*/
public CleverGuessingStrategy(SmartDictionary dict) {
this.dict = dict;
}

/**
* Makes one guess, either a letter or a word, for the given game. A clever guess is usually the most common letter among the possible words but there are
* some exceptions and optimizations
*/
public void nextGuess(HangmanGame game) {
// get all the words which could possibly match what we know
if(game != this.previousGame) {
this.previousMatchingSet = null;
this.previousGame = game;
}
List<String> matchingSet = getMatchingSet(game, previousMatchingSet);
this.previousMatchingSet = matchingSet;

// if we are on our last guess, and no guessed letter could narrow down our choices to one, then we may as well just guess a word
if(game.numWrongGuessesRemaining() == 0) {
// each word is equally likely to be the one, so just pick one
String word = matchingSet.get(0);
game.guessWord(word);
return;
}

// if the matching set is three or fewer then it's just as likely to simply start guessing
if(matchingSet.size() <= 3) {
String word = matchingSet.get(0);
game.guessWord(word);
return;
}

// The most important thing to consider is that correct guesses don't count against us, so we pick the most common letter
Character letter = getBestLetter(game, matchingSet);
game.guessLetter(letter);
}

/**
* This method is the keystone of the algorithm. It basically selects the most common letter among the matching set, but it modifies
* it a little bit for letters which should be obvious, or common patterns in English.
*
* @param game
* @param matchingSet
* @return
*/
private Character getBestLetter(HangmanGame game, List<String> matchingSet) {
Set<Character> unguessedLetters = getAllLetters();
unguessedLetters.removeAll(game.getAllGuessedLetters());

Map<Character, Integer> frequentLetters = getLettersByFrequency(matchingSet);
Map<Character, Integer> unguessedFrequentLetters = intersection(frequentLetters, unguessedLetters);

// for one thing, we never need to guess any letter which doesn't increase our information across the word
for(Character letter: unguessedFrequentLetters.keySet()) {
if(allCharPatternsMatch(matchingSet, letter)) {
unguessedFrequentLetters.remove(letter);
}
}

// now put the letters in order
List<Character> sortedLetters = new ArrayList<Character>();
List<Character> mostFrequent = new ArrayList<Character>();
int mostFrequentVal = 0;
int frequencyMax = 999999999;
while(sortedLetters.size() < unguessedFrequentLetters.size()) {
mostFrequent.clear();
for(Character letter: unguessedFrequentLetters.keySet()) {
if(unguessedFrequentLetters.get(letter) >= frequencyMax) {
continue;
}
if(unguessedFrequentLetters.get(letter) > mostFrequentVal) {
mostFrequent.clear();
mostFrequentVal = unguessedFrequentLetters.get(letter);
}
if(unguessedFrequentLetters.get(letter) >= mostFrequentVal) {
}
}
frequencyMax = mostFrequentVal;
mostFrequentVal= 0;
}

// next, if the second-to-last letter of a long word is 'E', then don't pick D, R, or S
// because that is likely to give low information
String guessedSoFar = game.getGuessedSoFar();
if(guessedSoFar.length() > 4) {
if(guessedSoFar.charAt(guessedSoFar.length() - 2) == 'E') {
if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('D');
if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('R');
if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('S');
// furthermore, try to find a vowel in the root word if it is near the top of the list
for(int i = 0; i < 5 && i < unguessedFrequentLetters.size(); i++) {
if(sortedLetters.get(i) == 'A') return 'A';
if(sortedLetters.get(i) == 'I') return 'I';
if(sortedLetters.get(i) == 'O') return 'O';
if(sortedLetters.get(i) == 'U') return 'U';
}
}
}

// if one letter is most common, we use it
if(sortedLetters.size() == 1) {
Integer zerothCharVal = unguessedFrequentLetters.get(sortedLetters.get(0));
Integer firstCharVal = unguessedFrequentLetters.get(sortedLetters.get(1));
if(zerothCharVal > firstCharVal) {
Character letter = sortedLetters.get(0);
return letter;
}
}

// prefer these letters, just because
for(int i = 0; i < 5 && i < unguessedFrequentLetters.size(); i++) {
if(sortedLetters.get(i) == 'M') return 'M';
if(sortedLetters.get(i) == 'F') return 'F';
if(sortedLetters.get(i) == 'U') return 'U';
}

// take whatever letter we have to
Iterator<Character> iter = sortedLetters.iterator();
Character letter = iter.next();
while(!unguessedFrequentLetters.containsKey(letter)) letter = iter.next();

return letter;
}

/**
* Returns a list of words which match the pattern given by the HangmanGame.
*
* @param guessedSoFar
* @param incorrectGuessedLetters
* @param guessedWords
* @param previousMatchingSet
* @return
*/
public List<String> getMatchingSet(HangmanGame game, List<String> previousMatchingSet) {
String guessedSoFar = game.getGuessedSoFar();
Set<Character> incorrectGuessedLetters = game.getIncorrectlyGuessedLetters();
Set<String> guessedWords = game.getIncorrectlyGuessedWords();

ArrayList<String> matchingSet = new ArrayList<String>();

// filter all words of known length according to the supplied pattern
String pattern = guessedSoFar.replace("-", ".");
List<String> startingList = (previousMatchingSet != null) ? previousMatchingSet : getWordsOfLength(pattern.length());
for(String word: startingList) {
if(Pattern.matches(pattern, word)) {
// don't include words which don't contain guessed letters
boolean containsGuessedLetter = false;
for(Character guessedLetter: incorrectGuessedLetters) {
int letterIndex = word.indexOf(guessedLetter);
if(letterIndex >= 0) {
containsGuessedLetter = true;
continue;
}
}
if(containsGuessedLetter == true) {
continue;
}
if(guessedWords.contains(word)) {
continue;
}
}
}

return matchingSet;
}

/**
* Returns true if all the given words contain the given letter at the same places.
*
* @param words
* @param letter
* @return
*/
private boolean allCharPatternsMatch(List<String> words, Character letter) {
String charPatternZero = buildCharPattern(words.get(0), letter);
for(String word: words) {
String charPattern = buildCharPattern(word, letter);
if(charPattern != charPatternZero) {
return false;
}
}
return true;
}

/**
* Build a string which shows the given letter in the given word, and blanks other letters.
* This is used to compare words to see if they have the same pattern for a given letter.
*
* @param word
* @param letter
* @return
*/
private String buildCharPattern(String word, Character letter) {
StringBuilder charPattern = new StringBuilder();
for(int i = 0; i < word.length(); i++) {
charPattern.append(
(word.charAt(i) == letter)
? letter
: "_"
);
}
return charPattern.toString();
}

/**
* Returns a new list containing all letters from validLetters in the same order as the letters
* in letterList.
*
* @param letterList
* @param validLetters
* @return
*/
private Map<Character, Integer> intersection(Map<Character, Integer> letterList, Set<Character> validLetters) {
Map<Character, Integer> intersectionList = new HashMap<Character, Integer>();

for(Character letter: validLetters) {
if(letterList.containsKey(letter)) {
intersectionList.put(letter, letterList.get(letter));
}
}

return intersectionList;
}

/**
* Generates an ordered list of letters in the given list of words. The most frequent letters are at the front of the array.
*
* @param matchingSet
* @return
*/
public Map<Character, Integer> getLettersByFrequency(List<String> matchingSet) {
// count up all the letter frequencies
Map<Character, Integer> letterFrequencies = new HashMap<Character, Integer>();
for(String word: matchingSet) {
for(int i = 0; i < matchingSet.get(0).length(); i++) {
Character wordChar = word.charAt(i);
wordChar = Character.toUpperCase(wordChar);
Integer frequency = letterFrequencies.get(wordChar);
if(frequency == null) frequency = new Integer(0);
frequency++;
letterFrequencies.put(wordChar, frequency);
}
}

return letterFrequencies;
}

/**
* Returns a list of words of the given length. Caches the result.
*
* @param wordLength
* @return
*/
public List<String> getWordsOfLength(int wordLength) {
return dict.wordsByLength(wordLength);
}

/**
* Returns a set of characters containing all upper-case English-language characters.
* @return
*/
public static Set<Character> getAllLetters() {
HashSet<Character> allLetters = new HashSet<Character>();
return allLetters;
}
}
```