This is a clever guessing strategy for hangman, implemented against a game engine which I cannot show you because I did not write it. This was my answer to an interview code test. This code uses the utility class SmartDictionary as an optimization.
import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; /** * A strategy for generating guesses given the current state of a Hangman game. * * The strategy is to build a list of all possible words which match the current state of the game (the "matching set", then guess the most common letter * among those words. This strategy is chosen specifically for the overwhelming benefit of guessing correctly: the guess does not count against you. * Moreover, if it is wrong the list words is reduced substantially because it contains no words with that popular letter. * * This is not the only way to make a clever guess. Another way would be to look at the way a guessed letter would divide the matching set into somewhat * evenly sized pieces. That is, it is superior to guess a letter which will divide the list into ten evenly sized lists, rather than into two evenly sized lists, * or into one large list and many small ones. */ public class CleverGuessingStrategy implements GuessingStrategy { private static final String WORDS_FILE = "/workspace/Hangman/src/words.txt"; private static final int MAX_GUESSES = 5; /** * Create a HangmanGame and run a CleverGuessingStrategy against it. * @param args */ public static void main(String[] args) { try { // read in the dictionary BufferedReader reader = new BufferedReader(new FileReader(WORDS_FILE)); ArrayList<String> words = new ArrayList<String>(); String line = null; while((line=reader.readLine())!=null) { words.add(line.toUpperCase()); } reader.close(); SmartDictionary dict = new SmartDictionary(words); // create a list of words to play against ArrayList<String> secretWords = words; // play a game for each word in the dictionary for(String word: secretWords) { HangmanGame game = new HangmanGame(word, MAX_GUESSES); GuessingStrategy strategy = new CleverGuessingStrategy(dict); while(game.gameStatus() == HangmanGame.Status.KEEP_GUESSING) { strategy.nextGuess(game); } System.out.println(String.format("Score: %2d - \"%s\": ", game.currentScore(), word)); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } private SmartDictionary dict; // we use these two members as an optimization // we save a lot of time by not recalculating the previous matching set private HangmanGame previousGame = null; private List<String> previousMatchingSet = null; /** * Constructor; takes a dictionary of words * * @param dict */ public CleverGuessingStrategy(SmartDictionary dict) { this.dict = dict; } /** * Makes one guess, either a letter or a word, for the given game. A clever guess is usually the most common letter among the possible words but there are * some exceptions and optimizations */ public void nextGuess(HangmanGame game) { // get all the words which could possibly match what we know if(game != this.previousGame) { this.previousMatchingSet = null; this.previousGame = game; } List<String> matchingSet = getMatchingSet(game, previousMatchingSet); this.previousMatchingSet = matchingSet; // if we are on our last guess, and no guessed letter could narrow down our choices to one, then we may as well just guess a word if(game.numWrongGuessesRemaining() == 0) { // each word is equally likely to be the one, so just pick one String word = matchingSet.get(0); game.guessWord(word); return; } // if the matching set is three or fewer then it's just as likely to simply start guessing if(matchingSet.size() <= 3) { String word = matchingSet.get(0); game.guessWord(word); return; } // The most important thing to consider is that correct guesses don't count against us, so we pick the most common letter Character letter = getBestLetter(game, matchingSet); game.guessLetter(letter); } /** * This method is the keystone of the algorithm. It basically selects the most common letter among the matching set, but it modifies * it a little bit for letters which should be obvious, or common patterns in English. * * @param game * @param matchingSet * @return */ private Character getBestLetter(HangmanGame game, List<String> matchingSet) { Set<Character> unguessedLetters = getAllLetters(); unguessedLetters.removeAll(game.getAllGuessedLetters()); Map<Character, Integer> frequentLetters = getLettersByFrequency(matchingSet); Map<Character, Integer> unguessedFrequentLetters = intersection(frequentLetters, unguessedLetters); // for one thing, we never need to guess any letter which doesn't increase our information across the word for(Character letter: unguessedFrequentLetters.keySet()) { if(allCharPatternsMatch(matchingSet, letter)) { unguessedFrequentLetters.remove(letter); } } // now put the letters in order List<Character> sortedLetters = new ArrayList<Character>(); List<Character> mostFrequent = new ArrayList<Character>(); int mostFrequentVal = 0; int frequencyMax = 999999999; while(sortedLetters.size() < unguessedFrequentLetters.size()) { mostFrequent.clear(); for(Character letter: unguessedFrequentLetters.keySet()) { if(unguessedFrequentLetters.get(letter) >= frequencyMax) { continue; } if(unguessedFrequentLetters.get(letter) > mostFrequentVal) { mostFrequent.clear(); mostFrequentVal = unguessedFrequentLetters.get(letter); } if(unguessedFrequentLetters.get(letter) >= mostFrequentVal) { mostFrequent.add(letter); } } sortedLetters.addAll(mostFrequent); frequencyMax = mostFrequentVal; mostFrequentVal= 0; } // next, if the second-to-last letter of a long word is 'E', then don't pick D, R, or S // because that is likely to give low information String guessedSoFar = game.getGuessedSoFar(); if(guessedSoFar.length() > 4) { if(guessedSoFar.charAt(guessedSoFar.length() - 2) == 'E') { if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('D'); if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('R'); if(unguessedFrequentLetters.size() > 1) unguessedFrequentLetters.remove('S'); // furthermore, try to find a vowel in the root word if it is near the top of the list for(int i = 0; i < 5 && i < unguessedFrequentLetters.size(); i++) { if(sortedLetters.get(i) == 'A') return 'A'; if(sortedLetters.get(i) == 'I') return 'I'; if(sortedLetters.get(i) == 'O') return 'O'; if(sortedLetters.get(i) == 'U') return 'U'; } } } // if one letter is most common, we use it if(sortedLetters.size() == 1) { Integer zerothCharVal = unguessedFrequentLetters.get(sortedLetters.get(0)); Integer firstCharVal = unguessedFrequentLetters.get(sortedLetters.get(1)); if(zerothCharVal > firstCharVal) { Character letter = sortedLetters.get(0); return letter; } } // prefer these letters, just because for(int i = 0; i < 5 && i < unguessedFrequentLetters.size(); i++) { if(sortedLetters.get(i) == 'M') return 'M'; if(sortedLetters.get(i) == 'F') return 'F'; if(sortedLetters.get(i) == 'U') return 'U'; } // take whatever letter we have to Iterator<Character> iter = sortedLetters.iterator(); Character letter = iter.next(); while(!unguessedFrequentLetters.containsKey(letter)) letter = iter.next(); return letter; } /** * Returns a list of words which match the pattern given by the HangmanGame. * * @param guessedSoFar * @param incorrectGuessedLetters * @param guessedWords * @param previousMatchingSet * @return */ public List<String> getMatchingSet(HangmanGame game, List<String> previousMatchingSet) { String guessedSoFar = game.getGuessedSoFar(); Set<Character> incorrectGuessedLetters = game.getIncorrectlyGuessedLetters(); Set<String> guessedWords = game.getIncorrectlyGuessedWords(); ArrayList<String> matchingSet = new ArrayList<String>(); // filter all words of known length according to the supplied pattern String pattern = guessedSoFar.replace("-", "."); List<String> startingList = (previousMatchingSet != null) ? previousMatchingSet : getWordsOfLength(pattern.length()); for(String word: startingList) { if(Pattern.matches(pattern, word)) { // don't include words which don't contain guessed letters boolean containsGuessedLetter = false; for(Character guessedLetter: incorrectGuessedLetters) { int letterIndex = word.indexOf(guessedLetter); if(letterIndex >= 0) { containsGuessedLetter = true; continue; } } if(containsGuessedLetter == true) { continue; } if(guessedWords.contains(word)) { continue; } matchingSet.add(word); } } return matchingSet; } /** * Returns true if all the given words contain the given letter at the same places. * * @param words * @param letter * @return */ private boolean allCharPatternsMatch(List<String> words, Character letter) { String charPatternZero = buildCharPattern(words.get(0), letter); for(String word: words) { String charPattern = buildCharPattern(word, letter); if(charPattern != charPatternZero) { return false; } } return true; } /** * Build a string which shows the given letter in the given word, and blanks other letters. * This is used to compare words to see if they have the same pattern for a given letter. * * @param word * @param letter * @return */ private String buildCharPattern(String word, Character letter) { StringBuilder charPattern = new StringBuilder(); for(int i = 0; i < word.length(); i++) { charPattern.append( (word.charAt(i) == letter) ? letter : "_" ); } return charPattern.toString(); } /** * Returns a new list containing all letters from validLetters in the same order as the letters * in letterList. * * @param letterList * @param validLetters * @return */ private Map<Character, Integer> intersection(Map<Character, Integer> letterList, Set<Character> validLetters) { Map<Character, Integer> intersectionList = new HashMap<Character, Integer>(); for(Character letter: validLetters) { if(letterList.containsKey(letter)) { intersectionList.put(letter, letterList.get(letter)); } } return intersectionList; } /** * Generates an ordered list of letters in the given list of words. The most frequent letters are at the front of the array. * * @param matchingSet * @return */ public Map<Character, Integer> getLettersByFrequency(List<String> matchingSet) { // count up all the letter frequencies Map<Character, Integer> letterFrequencies = new HashMap<Character, Integer>(); for(String word: matchingSet) { for(int i = 0; i < matchingSet.get(0).length(); i++) { Character wordChar = word.charAt(i); wordChar = Character.toUpperCase(wordChar); Integer frequency = letterFrequencies.get(wordChar); if(frequency == null) frequency = new Integer(0); frequency++; letterFrequencies.put(wordChar, frequency); } } return letterFrequencies; } /** * Returns a list of words of the given length. Caches the result. * * @param wordLength * @return */ public List<String> getWordsOfLength(int wordLength) { return dict.wordsByLength(wordLength); } /** * Returns a set of characters containing all upper-case English-language characters. * @return */ public static Set<Character> getAllLetters() { HashSet<Character> allLetters = new HashSet<Character>(); allLetters.add('A'); allLetters.add('B'); allLetters.add('C'); allLetters.add('D'); allLetters.add('E'); allLetters.add('F'); allLetters.add('G'); allLetters.add('H'); allLetters.add('I'); allLetters.add('J'); allLetters.add('K'); allLetters.add('L'); allLetters.add('M'); allLetters.add('N'); allLetters.add('O'); allLetters.add('P'); allLetters.add('Q'); allLetters.add('R'); allLetters.add('S'); allLetters.add('T'); allLetters.add('U'); allLetters.add('V'); allLetters.add('W'); allLetters.add('X'); allLetters.add('Y'); allLetters.add('Z'); return allLetters; } }
No comments:
Post a Comment