Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions src/main/java/com/thealgorithms/strings/TopKFrequentWords.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.thealgorithms.strings;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Utility class to find the top-k most frequent words.
*
* <p>Words are ranked by frequency in descending order. For equal frequencies,
* words are ranked in lexicographical ascending order.
*
* <p>Reference:
* https://en.wikipedia.org/wiki/Top-k_problem
*
*/
public final class TopKFrequentWords {
private TopKFrequentWords() {
}

/**
* Finds the k most frequent words.
*
* @param words input array of words
* @param k number of words to return
* @return list of top-k words ordered by frequency then lexicographical order
* @throws IllegalArgumentException if words is null, k is negative, or words contains null
*/
public static List<String> findTopKFrequentWords(String[] words, int k) {
if (words == null) {
throw new IllegalArgumentException("Input words array cannot be null.");
}
if (k < 0) {
throw new IllegalArgumentException("k cannot be negative.");
}
if (k == 0 || words.length == 0) {
return List.of();
}

Map<String, Integer> frequency = new HashMap<>();
for (String word : words) {
if (word == null) {
throw new IllegalArgumentException("Input words cannot contain null values.");
}
frequency.put(word, frequency.getOrDefault(word, 0) + 1);
}

List<String> candidates = new ArrayList<>(frequency.keySet());
candidates.sort(Comparator.<String>comparingInt(frequency::get).reversed().thenComparing(Comparator.naturalOrder()));

int limit = Math.min(k, candidates.size());
return new ArrayList<>(candidates.subList(0, limit));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.thealgorithms.strings;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import java.util.List;
import java.util.stream.Stream;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class TopKFrequentWordsTest {

@ParameterizedTest
@MethodSource("validTestCases")
void testFindTopKFrequentWords(String[] words, int k, List<String> expected) {
assertEquals(expected, TopKFrequentWords.findTopKFrequentWords(words, k));
}

static Stream<Arguments> validTestCases() {
return Stream.of(Arguments.of(new String[] {"i", "love", "leetcode", "i", "love", "coding"}, 2, List.of("i", "love")), Arguments.of(new String[] {"the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"}, 4, List.of("the", "is", "sunny", "day")),
Arguments.of(new String[] {"bbb", "aaa", "bbb", "aaa", "ccc"}, 2, List.of("aaa", "bbb")), Arguments.of(new String[] {"one", "two", "three"}, 10, List.of("one", "three", "two")), Arguments.of(new String[] {}, 3, List.of()), Arguments.of(new String[] {"x", "x", "y"}, 0, List.of()));
}

@ParameterizedTest
@MethodSource("invalidTestCases")
void testFindTopKFrequentWordsInvalidInput(String[] words, int k) {
assertThrows(IllegalArgumentException.class, () -> TopKFrequentWords.findTopKFrequentWords(words, k));
}

static Stream<Arguments> invalidTestCases() {
return Stream.of(Arguments.of((String[]) null, 1), Arguments.of(new String[] {"a", null, "b"}, 2), Arguments.of(new String[] {"a"}, -1));
}
}
Loading