380. Smooshed Morse Code

Description

Covert a string, e.g. programmer into Morse code, e.g. .--..-.-----..-..-----..-. /r/dailyprogrammer thread

Solution

"""
_380_smooshed_morse_code.py

Solve https://www.reddit.com/r/dailyprogrammer/comments/cmd1hb/20190805_challenge_380_easy_smooshed_morse_code_1/

"""

from typing import List, Iterable
from collections import defaultdict
import os

"""A mapping of `[a...z]` to Morse code."""
ALPHA_TO_MORSE = {
    "a": ".-", "b": "-...", "c": "-.-.", "d": "-..", "e": ".",  
    "f": "..-.", "g": "--.", "h": "....", "i": "..", "j": ".---", 
    "k": "-.-", "l": ".-..", "m": "--", "n": "-.", "o": "---", 
    "p": ".--.", "q": "--.-", "r": ".-.", "s": "...", "t": "-", 
    "u": "..-", "v": "...-", "w": ".--", "x": "-..-", "y": "-.--", 
    "z": "--.."
}

def smorse(alpha_text: str) -> str:
    """
    Args:
        alpha_text: The string whose characters are in `[a...z]`

    Returns:
        The smooshed morse code representation of `alpha_text`

    Raises:
        ValueError if any character in `alpha_text` is not in `[a...z]`
    """
    smooshed_morse_text = [None] * len(alpha_text)
    for idx, c in enumerate(alpha_text):
        if c not in ALPHA_TO_MORSE:
            raise ValueError(
                f"{c} is not a character in a...z (case-sensitive)"
            )
        smooshed_morse_text[idx] = ALPHA_TO_MORSE[c]
    return "".join(smooshed_morse_text)

def main():
    assert smorse("sos") == "...---..."
    assert smorse("daily") == "-...-...-..-.--"
    assert smorse("programmer") == ".--..-.-----..-..-----..-."
    assert smorse("bits") == "-.....-..."
    assert smorse("three") == "-.....-..."
    
if __name__ == "__main__":
    main()

Bonus Challenges

Bonus Challenge #1

The sequence -...-....-.--. is the code for four different words (needing, nervate, niding, tiling). Find the only sequence that’s the code for 13 different words.

"""challenge_1.py"""

from typing import List

from morse_trie import MorseTrie
from smorse_utils import word_list

def solution() -> str:
    """
    The sequence `-...-....-.--.` is the code for four different words 
    (`needing`, `nervate`, `niding`, `tiling`). Find the only sequence that’s 
    the code for 13 different words.

    Returns: `-....--....`
    """
    morse_trie = MorseTrie(["needing", "nervate", "niding", "tiling"])
    matching_codes = morse_trie.keys_with_count(4)
    assert len(matching_codes) == 1
    assert matching_codes[0] == "-...-....-.--."

    morse_trie = MorseTrie(word_list)
    return morse_trie.keys_with_count(13)[0]

if __name__ == "__main__":
    print(
        f"{solution()} encodes 13 different words"
    )

My solution uses a prefix tree to use less memory(? assuming that a significant number of smorse representations share prefixes; after all, the alphabet has 2 characters, so why not). I didn’t learn about prefix trees until my second CS class. Using a hash table could work as well, and that’s not too involving.

Bonus Challenge #2

autotomous encodes to .-..--------------..-..., which has 14 dashes in a row. Find the only word that has 15 dashes in a row.

"""challenge_2.py"""

from smorse_utils import word_list
from _380_smooshed_morse_code import smorse

def solution() -> str:
    """
    `autotomous` encodes to `.-..--------------..-...`, which has 14 dashes in 
    a row. Find the only word that has 15 dashes in a row.

    Returns: `bottommost` which encodes to `-...---------------...-`
    """
    for word in word_list:
        smorsed_word = smorse(word)
        if "---------------" in smorsed_word:
            return word
    return None

if __name__ == "__main__":
    word = solution()
    print(f"{word} -> {smorse(word)} has 15 dashes in a row")



# Challenge: `--.---.---.--` is one of five 13-character sequences that 
# does not appear in the encoding of any word. Find the other four.
# morse_trie = MorseTrie(alpha_words())
# for idx, word in enumerate(alpha_words()):
#     assert morse_trie.word_exists(word)
#     if idx > 10: break

def get_char_sequences_of_size_n(n: int) -> Iterable[str]:
    morse_literals_lengths = {}
    for morse_literal in ALPHA_TO_MORSE.values():
        length = len(morse_literal) 
        if length not in morse_literals_lengths:
            morse_literals_lengths[length] = []
        morse_literals_lengths[length].append(morse_literal)

    available_lengths = list(morse_literals_lengths.keys())


get_char_sequences_of_size_n(13)

Using python is cheating at this point :-)

Bonus Challenge #3

Call a word perfectly balanced if its code has the same number of dots as dashes. counterdemonstrations is one of two 21-letter words that’s perfectly balanced. Find the other one.

"""challenge_3.py"""

from smorse_utils import word_list
from _380_smooshed_morse_code import smorse

def solution() -> str:
    """
    Call a word perfectly balanced if its code has the same number of dots as 
    dashes. `counterdemonstrations` is one of two 21-letter words that’s 
    perfectly balanced. Find the other one.
    
    Returns: `overcommercialization`
    """

    valid_balanced_words = set()
    for word in word_list:
        if len(word) != 21: continue
        
        smorsed_word = smorse(word)
        difference = 0
        for c in smorsed_word:
            if c == ".": difference += 1
            else: difference -= 1

        if difference == 0: valid_balanced_words.add(word)

    assert len(valid_balanced_words) == 2
    assert "counterdemonstrations" in valid_balanced_words
    valid_balanced_words.discard("counterdemonstrations")
    
    return valid_balanced_words.pop()

if __name__ == "__main__":
    print(f"The other balanced 21-letter word is {solution()}")

Neat challenge. The most convenient properties were our alphabet having only two letters and the question being a decision query, e.g. is word perfectly balanced? These two qualities made an arithmetic approach feasible.

Bonus Challenge #4

protectorate is 12 letters long and encodes to .--..-.----.-.-.----.-..--., which is a palindrome (i.e. the string is the same when reversed). Find the only 13-letter word that encodes to a palindrome.

"""challenge_4.py"""

from _380_smooshed_morse_code import smorse
from smorse_utils import word_list

def solution():
    """
    `protectorate` is 12 letters long and encodes to `.--..-.----.-.-.----.-..--.`, 
    which is a palindrome. Find the only 13-letter word that encodes to a 
    palindrome.

    Returns: `intransigence`
    """
    def __is_palindrome(text):
        idx = 0
        max_idx = int(len(text) / 2)
        while idx < max_idx and text[idx] == text[-(idx + 1)]:
            idx += 1

        if idx == max_idx: return True
        return False

    assert __is_palindrome(smorse("protectorate"))

    valid_palindromes = set()
    for word in word_list:
        if len(word) != 13: continue
        if __is_palindrome(smorse(word)): valid_palindromes.add(word)

    assert len(valid_palindromes) == 1
    return valid_palindromes.pop()

if __name__ == "__main__":
    print(f"The other 13-letter palindrome is {solution()}")

Ah, the good old palindrome. If I had a shilling for every time an interviewer asked me this question…

Bonus Challenge #5

--.---.---.-- is one of five 13-character sequences that does not appear in the encoding of any word. Find the other four.

"""challenge_5.py"""

from typing import List
from collections import defaultdict
import itertools

from _380_smooshed_morse_code import smorse, ALPHA_TO_MORSE
from morse_trie import MorseTrie
from smorse_utils import word_list

def get_coin_change(desired_amount: int, available_coins: List[int]) -> List[List[int]]:
    """
    Args:
        desired_amount: The amount of money that we wish to end up with

        available_coins: 
            The coin denominations. We assume that there is an infinite # of 
            each coin.
            
    Returns:
        A list of possible ways of getting `desired_amount` from `available_coins`
    """
    available_coins.sort()
    combinations_for_n = defaultdict(list)
    for coin in available_coins:
        combinations_for_n[coin] = [[coin]]

    for coin in available_coins:
        for amount in range(coin, desired_amount + 1):
            deficit = amount - coin
            for deficit_combo in combinations_for_n[deficit]:
                combinations_for_n[amount].append(
                    deficit_combo + [coin]
                )

    return combinations_for_n[desired_amount]

def morse_sequences_of_length_n(n: int) -> List[str]:
    """Yields: Unique morse code sequences of length `n`"""
    morse_characters_by_length = defaultdict(list)
    for morse_literal in ALPHA_TO_MORSE.values():
        morse_characters_by_length[len(morse_literal)].append(morse_literal)

    already_seen_sequences = set()
    for length_combo in get_coin_change(n, list(morse_characters_by_length.keys())):
        morse_literals_pool = []
        for length in length_combo:
            morse_literals_pool.append(morse_characters_by_length[length])

        for permutation in itertools.product(*morse_literals_pool):
            sequence = "".join(permutation)
            if sequence not in already_seen_sequences:
                already_seen_sequences.add(sequence)
                yield sequence

def solution() -> List[str]:
    """
    `--.---.---.--` is one of five 13-character sequences that does not appear 
    in the encoding of any word. Find the other four.

    Returns: `['---.---.-----', '--.---.------', '---.----.----', '---.---.---.-']`
    """
    valid_sequences = set()
    search_space = []
    for word in word_list:
        search_space.append(smorse(word))

    search_space = "+".join(search_space) # Any char that's not '-' or '.'

    for sequence in morse_sequences_of_length_n(13):
        if sequence not in search_space:
            valid_sequences.add(sequence)

    assert len(valid_sequences) == 5
    assert "--.---.---.--" in valid_sequences

    valid_sequences.remove("--.---.---.--")
    return list(valid_sequences)

if __name__ == "__main__":
    print(
        f"These 13-character sequences do not appear in any decoding {solution()}"
    )

Not trivial, at least to me. The idea is intuitive: have a list of 13 character sequences and check if they appear in any decoding. Finding 13-character sequences is the coin change problem by another name. I tried to solve it efficiently some time back but failed . Chuck bless itertools.product