Find the shortest unique prefix for every word in an array

Given a list of words in lexicographic order where no word is the prefix of another, find the shortest unique prefix to identify each word in the array uniquely.

For example,

Input: [AND, BONFIRE, BOOL, CASE, CATCH, CHAR]

Output: [A, BON, BOO, CAS, CAT, CH]

Explanation:

A can uniquely identify AND
BON can uniquely identify BONFIRE
BOO can uniquely identify BOOL
CAS can uniquely identify CASE
CAT can uniquely identify CATCH
CH can uniquely identify CHAR

Practice this problem

The idea is to construct a Trie from the given words. While inserting the words in Trie, keep track of the total number of times each Trie node is visited. This can be easily done by maintaining an extra field in each Trie node for storing the frequency. Each Trie node’s frequency is initially 0 and incremented by one each time the Trie node is visited during the word’s insertion.

Finally, traverse the Trie in a preorder fashion to print the shortest unique prefix for each word in the Trie. For every Trie node being traversed, check its frequency. The prefix ending at a Trie node is the shortest unique prefix if its frequency is 1. Once the shortest unique prefix is identified, move to the next path in preorder traversal instead of traversing down further on the current Trie node.

Note that a prefix comprises all characters in a path from the root to a Trie node. The algorithm can be implemented as follows in C++, Java, and Python:

C++

#include <iostream>

#include <vector>

#include <map>

#include <string>

using namespace std;

// Data structure to store a Trie node

struct TrieNode

{

// each node stores a map to its child nodes

map<char, TrieNode*> child;

// keep track of the total number of times the current node is visited

// while inserting data in Trie

int freq = 0;

};

// Function to insert a given string into a Trie

void insert(TrieNode* &root, string word)

{

// start from the root node

TrieNode* curr = root;

for (char c: word)

{

// create a new node if the path doesn't exist

if (curr->child.find(c) == curr->child.end()) {

curr->child[c] = new TrieNode();

}

// increment frequency

curr->child[c]->freq++;

// go to the next node

curr = curr->child[c];

}

// Function to recursively traverse the Trie in a preorder fashion and

// print the shortest unique prefix for each word in the Trie

void printShortestPrefix(TrieNode *root, string word_so_far)

{

if (root == nullptr) {

return;

}

// print `word_so_far` if the current Trie node is visited only once

if (root->freq == 1)

{

cout << word_so_far << endl;

return;

}

// recur for all child nodes

for (auto &child: root->child) {

printShortestPrefix(child.second, word_so_far + child.first);

}

// Find the shortest unique prefix for every word in a given array

void findShortestPrefix(vector<string> &words)

{

// construct a Trie from the given items

TrieNode* root = new TrieNode();

for (string s: words) {

insert(root, s);

}

// Recursively traverse the Trie in a preorder fashion to list all prefixes

printShortestPrefix(root, "");

}

int main()

{

vector<string> words = { "AND", "BONFIRE", "BOOL", "CASE", "CATCH", "CHAR" };

findShortestPrefix(words);

return 0;

}

Download Run Code

Output:

A
BON
BOO
CAS
CAT
CH

Java

import java.util.HashMap;

import java.util.Map;

// A class to store a Trie node

class TrieNode

{

// each node stores a map to its child nodes

Map<Character, TrieNode> child = new HashMap<>();

// keep track of the total number of times the current node is visited

// while inserting data in Trie

int freq = 0;

}

class Main

{

// Function to insert a given string into a Trie

public static void insert(TrieNode root, String word)

{

// start from the root node

TrieNode curr = root;

for (char c: word.toCharArray())

{

// create a new node if the path doesn't exist

curr.child.putIfAbsent(c, new TrieNode());

// increment frequency

curr.child.get(c).freq++;

// go to the next node

curr = curr.child.get(c);

}

// Function to recursively traverse the Trie in a preorder fashion and

// print the shortest unique prefix for each word in the Trie

public static void printShortestPrefix(TrieNode root, String word_so_far)

{

if (root == null) {

return;

}

// print `word_so_far` if the current Trie node is visited only once

if (root.freq == 1)

{

System.out.println(word_so_far);

return;

}

// recur for all child nodes

for (Map.Entry<Character, TrieNode> child: root.child.entrySet()) {

printShortestPrefix(child.getValue(), word_so_far + child.getKey());

}

// Find the shortest unique prefix for every word in a given array

public static void findShortestPrefix(String[] words)

{

// construct a Trie from the given items

TrieNode root = new TrieNode();

for (String s: words) {

insert(root, s);

}

// Recursively traverse the Trie in a preorder fashion to list all prefixes

printShortestPrefix(root, "");

}

public static void main(String[] args)

{

String[] words = { "AND", "BONFIRE", "BOOL", "CASE", "CATCH", "CHAR" };

findShortestPrefix(words);

}

Download Run Code

Output:

A
BON
BOO
CAS
CAT
CH

Python

# A class to store a Trie node

class TrieNode:

def __init__(self):

# each node stores a dictionary to its child nodes

self.child = {}

# keep track of the total number of times the current node is visited

# while inserting data in Trie

self.freq = 0

# Function to insert a given string into a Trie

def insert(root, word):

# start from the root node

curr = root

for c in word:

# create a new node if the path doesn't exist

curr.child.setdefault(c, TrieNode())

# increment frequency

curr.child[c].freq += 1

# go to the next node

curr = curr.child[c]

# Function to recursively traverse the Trie in a preorder fashion and

# print the shortest unique prefix for each word in the Trie

def printShortestPrefix(root, word_so_far):

if root is None:

return

# print `word_so_far` if the current Trie node is visited only once

if root.freq == 1:

print(word_so_far)

return

# recur for all child nodes

for k, v in root.child.items():

printShortestPrefix(v, word_so_far + k)

# Find the shortest unique prefix for every word in a given array

def findShortestPrefix(words):

# construct a Trie from the given items

root = TrieNode()

for s in words:

insert(root, s)

# Recursively traverse the Trie in a preorder fashion to list all prefixes

printShortestPrefix(root, '')

if __name__ == '__main__':

words = ['AND', 'BONFIRE', 'BOOL', 'CASE', 'CATCH', 'CHAR']

findShortestPrefix(words)

Download Run Code

Output:

A
BOO
BON
CAT
CAS
CH

The time complexity of the above solution is O(N.M), where N is the total number of words and M is the maximum word length.