I don't have a lot to say, but this is my little bit.

Monday, November 26, 2012

Hangman Clever Guessing Strategy

This is a clever guessing strategy for hangman, implemented against a game engine which I cannot show you because I did not write it. This was my answer to an interview code test. This code uses the utility class SmartDictionary as an optimization.

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

/**
 * A strategy for generating guesses given the current state of a Hangman game.
 * 
 * The strategy is to build a list of all possible words which match the current state of the game (the "matching set", then guess the most common letter
 * among those words. This strategy is chosen specifically for the overwhelming benefit of guessing correctly: the guess does not count against you.
 *  Moreover, if it is wrong the list words is reduced substantially because it contains no words with that popular letter.
 * 
 * This is not the only way to make a clever guess. Another way would be to look at the way a guessed letter would divide the matching set into somewhat
 * evenly sized pieces. That is, it is superior to guess a letter which will divide the list into ten evenly sized lists, rather than into two evenly sized lists,
 * or into one large list and many small ones.
 */
public class CleverGuessingStrategy implements GuessingStrategy {
    private static final String WORDS_FILE = "/workspace/Hangman/src/words.txt";
    private static final int MAX_GUESSES = 5;

    /**
     * Create a HangmanGame and run a CleverGuessingStrategy against it.
     * @param args
     */
    public static void main(String[] args) {
        try {
            // read in the dictionary
            BufferedReader reader = new BufferedReader(new FileReader(WORDS_FILE));
            ArrayList<String> words = new ArrayList<String>();
            String line = null;
            while((line=reader.readLine())!=null) {
               words.add(line.toUpperCase());
            }
            reader.close();
            SmartDictionary dict = new SmartDictionary(words);
            
            // create a list of words to play against
            ArrayList<String> secretWords = words;
            
            // play a game for each word in the dictionary
            for(String word: secretWords) {
                HangmanGame game = new HangmanGame(word, MAX_GUESSES);
                GuessingStrategy strategy = new CleverGuessingStrategy(dict);
                
                while(game.gameStatus() == HangmanGame.Status.KEEP_GUESSING) {
                    strategy.nextGuess(game);
                }
                System.out.println(String.format("Score: %2d - \"%s\": ", game.currentScore(), word));
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }        
    }
    
    private SmartDictionary dict;
    
    // we use these two members as an optimization
    // we save a lot of time by not recalculating the previous matching set
    private HangmanGame previousGame = null;
    private List<String> previousMatchingSet = null;
    
    /**
     * Constructor; takes a dictionary of words
     * 
     * @param dict
     */
    public CleverGuessingStrategy(SmartDictionary dict) {
        this.dict = dict;
    }
    
    /**
     * Makes one guess, either a letter or a word, for the given game. A clever guess is usually the most common letter among the possible words but there are
     * some exceptions and optimizations
     */
    public void nextGuess(HangmanGame game) {
        // get all the words which could possibly match what we know
        if(game != this.previousGame) {
            this.previousMatchingSet = null;
            this.previousGame = game;
        }
        List<String> matchingSet = getMatchingSet(game, previousMatchingSet);
        this.previousMatchingSet = matchingSet;
        
        // if we are on our last guess, and no guessed letter could narrow down our choices to one, then we may as well just guess a word
        if(game.numWrongGuessesRemaining() == 0) {
            // each word is equally likely to be the one, so just pick one
            String word = matchingSet.get(0);
            game.guessWord(word);
            return;
        }
        
        // if the matching set is three or fewer then it's just as likely to simply start guessing
        if(matchingSet.size() <= 3) {
            String word = matchingSet.get(0);
            game.guessWord(word);
            return;
        }
        
        // The most important thing to consider is that correct guesses don't count against us, so we pick the most common letter
        Character letter = getBestLetter(game, matchingSet);
        game.guessLetter(letter);
    }

    /**
     * This method is the keystone of the algorithm. It basically selects the most common letter among the matching set, but it modifies
     * it a little bit for letters which should be obvious, or common patterns in English.
     * 
     * @param game
     * @param matchingSet
     * @return
     */
    private Character getBestLetter(HangmanGame game, List<String> matchingSet) {
        Set<Character> unguessedLetters = getAllLetters();
        unguessedLetters.removeAll(game.getAllGuessedLetters());
        
        Map<Character, Integer> frequentLetters = getLettersByFrequency(matchingSet);
        Map<Character, Integer> unguessedFrequentLetters = intersection(frequentLetters, unguessedLetters);
        
        // for one thing, we never need to guess any letter which doesn't increase our information across the word
        for(Character letter: unguessedFrequentLetters.keySet()) {
            if(allCharPatternsMatch(matchingSet, letter)) {
                unguessedFrequentLetters.remove(letter);
            }
        }
        
        // now put the letters in order
        List<Character> sortedLetters = new ArrayList<Character>();
        List<Character> mostFrequent = new ArrayList<Character>();
        int mostFrequentVal = 0;
        int frequencyMax = 999999999;
        while(sortedLetters.size() < unguessedFrequentLetters.size()) {
            mostFrequent.clear();
            for(Character letter: unguessedFrequentLetters.keySet()) {
                if(unguessedFrequentLetters.get(letter) >= frequencyMax) {
                    continue;
                }
                if(unguessedFrequentLetters.get(letter) > mostFrequentVal) {
                    mostFrequent.clear();
                    mostFrequentVal = unguessedFrequentLetters.get(letter);
                }
                if(unguessedFrequentLetters.get(letter) >= mostFrequentVal) {
                    mostFrequent.add(letter);
                }
            }
            sortedLetters.addAll(mostFrequent);
            frequencyMax = mostFrequentVal;
            mostFrequentVal= 0;
        }
        
        // next, if the second-to-last letter of a long word is 'E', then don't pick D, R, or S
        // because that is likely to give low information
        String guessedSoFar = game.getGuessedSoFar();
        if(guessedSoFar.length() > 4) {
            if(guessedSoFar.charAt(guessedSoFar.length() - 2) == 'E') {
                if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('D');
                if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('R');
                if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('S');
                // furthermore, try to find a vowel in the root word if it is near the top of the list
                for(int i = 0; i < 5 && i < unguessedFrequentLetters.size(); i++) {
                    if(sortedLetters.get(i) == 'A') return 'A';
                    if(sortedLetters.get(i) == 'I') return 'I';
                    if(sortedLetters.get(i) == 'O') return 'O';
                    if(sortedLetters.get(i) == 'U') return 'U';
                }
            }
        }
        
        // if one letter is most common, we use it
        if(sortedLetters.size() == 1) {
            Integer zerothCharVal = unguessedFrequentLetters.get(sortedLetters.get(0));
            Integer firstCharVal = unguessedFrequentLetters.get(sortedLetters.get(1));
            if(zerothCharVal > firstCharVal) {
                Character letter = sortedLetters.get(0);
                return letter;
            }
        }

        // prefer these letters, just because
        for(int i = 0; i < 5 && i < unguessedFrequentLetters.size(); i++) {
            if(sortedLetters.get(i) == 'M') return 'M';
            if(sortedLetters.get(i) == 'F') return 'F';
            if(sortedLetters.get(i) == 'U') return 'U';
        }
        
        // take whatever letter we have to
        Iterator<Character> iter = sortedLetters.iterator();
        Character letter = iter.next();
        while(!unguessedFrequentLetters.containsKey(letter)) letter = iter.next();
        
        return letter;
    }
    
    /**
     * Returns a list of words which match the pattern given by the HangmanGame.
     * 
     * @param guessedSoFar
     * @param incorrectGuessedLetters
     * @param guessedWords 
     * @param previousMatchingSet 
     * @return
     */
    public List<String> getMatchingSet(HangmanGame game, List<String> previousMatchingSet) {
        String guessedSoFar = game.getGuessedSoFar();
        Set<Character> incorrectGuessedLetters = game.getIncorrectlyGuessedLetters();
        Set<String> guessedWords = game.getIncorrectlyGuessedWords();
        
        ArrayList<String> matchingSet = new ArrayList<String>();
        
        // filter all words of known length according to the supplied pattern
        String pattern = guessedSoFar.replace("-", ".");
        List<String> startingList = (previousMatchingSet != null) ? previousMatchingSet : getWordsOfLength(pattern.length());
        for(String word: startingList) {
            if(Pattern.matches(pattern, word)) {
                // don't include words which don't contain guessed letters
                boolean containsGuessedLetter = false;
                for(Character guessedLetter: incorrectGuessedLetters) {
                    int letterIndex = word.indexOf(guessedLetter);
                    if(letterIndex >= 0) {
                        containsGuessedLetter = true;
                        continue;
                    }
                }
                if(containsGuessedLetter == true) {
                    continue;
                }
                if(guessedWords.contains(word)) {
                    continue;
                }
                matchingSet.add(word);
            }
        }
        
        return matchingSet;
    }

    /**
     * Returns true if all the given words contain the given letter at the same places.
     * 
     * @param words
     * @param letter
     * @return
     */
    private boolean allCharPatternsMatch(List<String> words, Character letter) {
        String charPatternZero = buildCharPattern(words.get(0), letter);
        for(String word: words) {
            String charPattern = buildCharPattern(word, letter);
            if(charPattern != charPatternZero) {
                return false;
            }
        }
        return true;
    }
    
    /**
     * Build a string which shows the given letter in the given word, and blanks other letters.
     * This is used to compare words to see if they have the same pattern for a given letter.
     * 
     * @param word
     * @param letter
     * @return
     */
    private String buildCharPattern(String word, Character letter) {
        StringBuilder charPattern = new StringBuilder();
        for(int i = 0; i < word.length(); i++) {
            charPattern.append(
                    (word.charAt(i) == letter)
                    ? letter
                    : "_"
                );
        }
        return charPattern.toString();
    }
    
    /**
     * Returns a new list containing all letters from validLetters in the same order as the letters
     * in letterList.
     * 
     * @param letterList
     * @param validLetters
     * @return
     */
    private Map<Character, Integer> intersection(Map<Character, Integer> letterList, Set<Character> validLetters) {
        Map<Character, Integer> intersectionList = new HashMap<Character, Integer>();
        
        for(Character letter: validLetters) {
            if(letterList.containsKey(letter)) {
                intersectionList.put(letter, letterList.get(letter));
            }
        }
        
        return intersectionList;
    }

    /**
     * Generates an ordered list of letters in the given list of words. The most frequent letters are at the front of the array.
     * 
     * @param matchingSet
     * @return
     */
    public Map<Character, Integer> getLettersByFrequency(List<String> matchingSet) {
        // count up all the letter frequencies
        Map<Character, Integer> letterFrequencies = new HashMap<Character, Integer>();
        for(String word: matchingSet) {
            for(int i = 0; i < matchingSet.get(0).length(); i++) {
                Character wordChar = word.charAt(i);
                wordChar = Character.toUpperCase(wordChar);
                Integer frequency = letterFrequencies.get(wordChar);
                if(frequency == null) frequency = new Integer(0);
                frequency++;
                letterFrequencies.put(wordChar, frequency);
            }
        }
        
        return letterFrequencies;
    }

    /**
     * Returns a list of words of the given length. Caches the result.
     * 
     * @param wordLength
     * @return
     */
    public List<String> getWordsOfLength(int wordLength) {
        return dict.wordsByLength(wordLength);
    }

    /**
     * Returns a set of characters containing all upper-case English-language characters.
     * @return
     */
    public static Set<Character> getAllLetters() {
        HashSet<Character> allLetters = new HashSet<Character>();
        allLetters.add('A');
        allLetters.add('B');
        allLetters.add('C');
        allLetters.add('D');
        allLetters.add('E');
        allLetters.add('F');
        allLetters.add('G');
        allLetters.add('H');
        allLetters.add('I');
        allLetters.add('J');
        allLetters.add('K');
        allLetters.add('L');
        allLetters.add('M');
        allLetters.add('N');
        allLetters.add('O');
        allLetters.add('P');
        allLetters.add('Q');
        allLetters.add('R');
        allLetters.add('S');
        allLetters.add('T');
        allLetters.add('U');
        allLetters.add('V');
        allLetters.add('W');
        allLetters.add('X');
        allLetters.add('Y');
        allLetters.add('Z');
        return allLetters;
    }
}

No comments:

Post a Comment