Find the top 5 highest occurring starting alphabets of words in US Presidents columns. If Name is James K. Polk, then starting alphabets are J, K & P. Sort it on the basis of descending order of frequency. This is case insensitive.
📌 Challenge Details and Links
ExcelBI Excel Challenge Number: 138
Challenge Difficulty: ⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Top Initial Letters Count with Power Query
Power Query solution 1 for Top Initial Letters Count, proposed by Zoran Milokanović:
let
Source = Excel.CurrentWorkbook(){[Name = "Presidents"]}[Content],
AddedAlphabet = Table.AddColumn(
Source,
"Alphabet",
each List.Transform(Text.Split([US Presidents], " "), each Text.Start(_, 1))
),
ExpandedAlphabet = Table.ExpandListColumn(AddedAlphabet, "Alphabet"),
GroupedAlphabet = Table.Group(
ExpandedAlphabet,
{"Alphabet"},
{{"Frequency", each Table.RowCount(_), Int64.Type}}
),
GroupedFrequence = Table.Group(GroupedAlphabet, {"Frequency"}, {{"Alphabet", each _[Alphabet]}}),
SortedFrequence = Table.Sort(GroupedFrequence, {{"Frequency", Order.Descending}}),
AddedTopN = Table.AddIndexColumn(SortedFrequence, "TopN", 1, 1, Int64.Type),
FilteredTop5 = Table.SelectRows(AddedTopN, each ([TopN] <= 5)),
Solution = Table.ExpandListColumn(FilteredTop5, "Alphabet")[[Alphabet], [Frequency]]
in
Solution
Power Query solution 2 for Top Initial Letters Count, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Iniciales = Table.AddColumn(
Source,
"Alpha",
each
let
a = Text.Split([US Presidents], " "),
b = List.Transform(a, each Text.Start(_, 1))
in
b
)[[Alpha]],
Listado = Table.ExpandListColumn(Iniciales, "Alpha"),
Agrupar = Table.Group(Listado, {"Alpha"}, {{"Freq", each Table.RowCount(_), Int64.Type}}),
Top = List.Last(
List.FirstN(
Table.Distinct(Table.Sort(Agrupar, {{"Freq", Order.Descending}}), {"Freq"})[Freq],
5
)
),
Filtro = Table.SelectRows(Agrupar, each [Freq] >= Top),
Sol = Table.Sort(Filtro, {{"Freq", Order.Descending}, {"Alpha", Order.Ascending}})
in
Sol
Power Query solution 3 for Top Initial Letters Count, proposed by Luan Rodrigues:
let
Fonte = Tabela1,
lst = Table.FromList(
List.Combine(
Table.AddColumn(
Fonte,
"Personalizar",
each Text.ToList(Text.Select([US Presidents], {"A" .. "Z"}))
)[Personalizar]
)
),
gp = Table.Group(lst, {"Column1"}, {{"Contagem", each Table.RowCount(_), Int64.Type}}),
fil = Table.SelectRows(
gp,
each List.Contains(List.MaxN(List.Distinct(gp[Contagem]), 5), [Contagem])
),
res = Table.Sort(fil, {{"Contagem", Order.Descending}})
in
res
Power Query solution 4 for Top Initial Letters Count, proposed by Brian Julius:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Dupe = Table.DuplicateColumn(Source, "US Presidents", "Alpahbet"),
ReplaceVanBuren = Table.ReplaceValue(
Dupe,
"Martin Van Buren",
"Martin VanBuren",
Replacer.ReplaceText,
{"Alpahbet"}
),
SplitDelim = Table.ExpandListColumn(
Table.TransformColumns(
ReplaceVanBuren,
{
{
"Alpahbet",
Splitter.SplitTextByDelimiter(" ", QuoteStyle.Csv),
let
itemType = (type nullable text) meta [Serialized.Text = true]
in
type {itemType}
}
}
),
"Alpahbet"
),
SplitPos = Table.SplitColumn(
SplitDelim,
"Alpahbet",
Splitter.SplitTextByPositions({0, 1}, false),
{"Alpahbet.1", "Alpahbet.2"}
),
Group = Table.Group(SplitPos, {"Alpahbet.1"}, {{"Frequency", each Table.RowCount(_), Int64.Type}}),
AddRank = Table.RenameColumns(
Table.RemoveColumns(
Table.SelectRows(
Table.AddRankColumn(
Group,
"Rank",
{"Frequency", Order.Descending},
[RankKind = RankKind.Dense]
),
each [Rank] <= 5
),
"Rank"
),
{"Alpahbet.1", "Alphabet"}
)
in
AddRank
Power Query solution 5 for Top Initial Letters Count, proposed by Jaroslaw Kujawa:
let
Source = Excel.CurrentWorkbook(){[Name = "Table10"]}[Content],
#"US Presidents" = Text.Remove(
Text.Combine(Source[US Presidents], " "),
{"a" .. "z"} & {" ", "."}
),
#"Imported Text" = Table.FromList(Lines.FromText(#"US Presidents")),
#"Split Column by Position" = Table.SplitColumn(
#"Imported Text",
"Column1",
Splitter.SplitTextByRepeatedLengths(1),
Text.Length(#"Imported Text"[Column1]{0})
),
#"Unpivoted Columns" = Table.UnpivotOtherColumns(
#"Split Column by Position",
{},
"Attribute",
"Value"
),
#"Removed Columns" = Table.RemoveColumns(#"Unpivoted Columns", {"Attribute"}),
#"Grouped Rows" = Table.Group(
#"Removed Columns",
{"Value"},
{{"Count", each Table.RowCount(_), Int64.Type}}
),
Custom1 = Table.AddColumn(
#"Grouped Rows",
"Custom",
each [Count] >= List.Min(List.MaxN(List.Distinct(#"Grouped Rows"[Count]), 5))
),
#"Filtered Rows" = Table.SelectRows(Custom1, each ([Custom] = true)),
#"Sorted Rows" = Table.Sort(#"Filtered Rows", {{"Count", Order.Descending}}),
#"Removed Columns1" = Table.RemoveColumns(#"Sorted Rows", {"Custom"})
in
#"Removed Columns1"
Power Query solution 6 for Top Initial Letters Count, proposed by Rafael González B.:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
ChangeToText = Table.TransformColumnTypes(Source, {{"US Presidents", type text}}),
SplitterListToTable = Table.AddColumn(
ChangeToText,
"SplitterList",
each Table.FromList(
Text.Split([US Presidents], " "),
Splitter.SplitByNothing(),
{"Names"},
null,
ExtraValues.Error
)
),
ExtractFirstLetters = Table.AddColumn(
SplitterListToTable,
"TableLists",
each Table.AddColumn([SplitterList], "Letters", each Text.Start([Names], 1))[[Letters]]
),
OnluColumnLetters = Table.Combine(
Table.RemoveColumns(ExtractFirstLetters, {"US Presidents", "SplitterList"})[TableLists]
),
GroupByLetters = Table.Group(
OnluColumnLetters,
{"Letters"},
{{"Recount", each Table.RowCount(_), Int64.Type}}
),
SortsByFrequency = Table.Sort(GroupByLetters, {{"Recount", Order.Descending}}),
Result = Table.RemoveColumns(
Table.SelectRows(
Table.AddRankColumn(
SortsByFrequency,
"Rank",
{"Recount", Order.Descending},
[RankKind = RankKind.Dense]
),
each [Rank] <= 5
),
{"Rank"}
),
FinalResult = Table.Sort(Result, {{"Recount", Order.Descending}, {"Letters", Order.Ascending}})
in
FinalResult
Power Query solution 7 for Top Initial Letters Count, proposed by Krzysztof Kominiak:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
AddLs = List.Combine(
Table.AddColumn(
Source,
"Ls",
each Splitter.SplitTextByRepeatedLengths(1)(
Text.Select(Text.Proper([US Presidents]), {"A" .. "Z"})
)
)[Ls]
),
ConvToTab = Table.FromList(AddLs, Splitter.SplitByNothing(), {"Letters"}, null, ExtraValues.Error),
GroupRows = Table.Group(
ConvToTab,
{"Letters"},
{{"Frequency", each Table.RowCount(_), type number}}
),
Result = Table.Sort(
Table.SelectRows(
GroupRows,
each [Frequency] >= List.MaxN(List.Distinct(GroupRows[Frequency]), 5){4}
),
{{"Frequency", Order.Descending}, {"Letters", Order.Ascending}}
)
in
Result
Power Query solution 8 for Top Initial Letters Count, proposed by Jan Willem Van Holst:
let
Source = Your data,
transToList = Table.TransformColumns(Source,{{"US Presidents", each List.Transform(Text.Split(_, " "), each Text.Start(_,1))}}),
combine = Table.FromList(List.Combine(transToList[US Presidents])),
#"Grouped Rows" = Table.Group(combine, {"Column1"}, {{"Count", each Table.RowCount(_), Int64.Type}}),
selectList = List.FirstN( List.Distinct(List.Sort(#"Grouped Rows"[Count], Order.Descending)), 5),
select = Table.SelectRows(#"Grouped Rows", each List.Contains(selectList, [Count])),
#"Sorted Rows" = Table.Sort(select,{{"Count", Order.Descending}})
in
#"Sorted Rows"
Power Query solution 9 for Top Initial Letters Count, proposed by Sue Bayes:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Duplicate = Table.DuplicateColumn(Source, "US Presidents", "Copy"),
Split = Table.ExpandListColumn(
Table.TransformColumns(
Duplicate,
{
{
"Copy",
Splitter.SplitTextByDelimiter(" ", QuoteStyle.Csv),
let
itemType = (type nullable text) meta [Serialized.Text = true]
in
type {itemType}
}
}
),
"Copy"
),
FirstChar = Table.TransformColumns(Split, {{"Copy", each Text.Start(_, 1), type text}}),
Group = Table.Sort(
Table.Group(FirstChar, {"Copy"}, {{"Count", each Table.RowCount(_), Int64.Type}}),
{{"Count", Order.Descending}}
),
Rank = Table.AddRankColumn(
Group,
"Rank",
{"Count", Order.Descending},
[RankKind = RankKind.Dense]
),
Filter = Table.SelectRows(Rank, each [Rank] < 6)[[Copy], [Count]]
in
Filter
Solving the challenge of Top Initial Letters Count with Excel
Excel solution 1 for Top Initial Letters Count, proposed by Bo Rydobon 🇹🇭:
=LET(a,CHAR(SEQUENCE(26)+64),c,CONCAT(A2:A47),l,LEN(c)-LEN(SUBSTITUTE(c,a,)),
SORT(FILTER(HSTACK(a,l),l>=LARGE(UNIQUE(l),5)),2,-1))
Excel solution 2 for Top Initial Letters Count, proposed by Rick Rothstein:
=LET(f,LEFT(TEXTSPLIT(CONCAT(PROPER(A2:A47)&" "),," ")),u,UNIQUE(f),n,MAP(u,LAMBDA(x,COUNTA(TEXTSPLIT(c,x))-1)),SORT(FILTER(HSTACK(u,n),n>=LARGE(UNIQUE(n),5)),2,-1))
Excel solution 3 for Top Initial Letters Count, proposed by John V.:
=LET(b,LEFT(TEXTSPLIT(CONCAT(A2:A47&" "),," ")),u,UNIQUE(b),f,MAP(u,LAMBDA(x,SUM(N(b=x)))),SORT(FILTER(HSTACK(u,f),f>=LARGE(UNIQUE(f),5)),2,-1))
Excel solution 4 for Top Initial Letters Count, proposed by محمد حلمي:
=LET(
i,CHAR(ROW(65:90)),
v,TEXTSPLIT(CONCAT(A2:A47&" ")," ",,1),
r,MMULT(--(i=LEFT(v)),TOCOL(LEN(v)^0)),
SORT(FILTER(HSTACK(i,r),r>=LARGE(UNIQUE(r),5)),
2,-1))
Excel solution 5 for Top Initial Letters Count, proposed by Kris Jaganah:
=LET(a,A2:A47,b,LEFT(TEXTSPLIT(CONCAT(a&" "),," ")),c,UNIQUE(b),d,MAP(c,LAMBDA(x,SUM(--(b=x)))),SORT(FILTER(HSTACK(c,d),d>=LARGE(UNIQUE(d),5)),2,-1))
Excel solution 6 for Top Initial Letters Count, proposed by Julian Poeltl:
=LET(N,LEFT(TEXTSPLIT(TEXTJOIN(" ",,A2:A47)," "),1),A,CHAR(64+SEQUENCE(26)),C,MAP(A,LAMBDA(A,COUNTA(FILTER(N,N=A)))),SORT(FILTER(HSTACK(A,C),C>=LARGE(UNIQUE(C),5)),2,-1))
Excel solution 7 for Top Initial Letters Count, proposed by Aditya Kumar Darak 🇮🇳:
=LET(
_n, 5,
_d, A2:A47,
_chr, CHAR(SEQUENCE(1, 26, 65)),
_cnt, LEN(_d) - LEN(SUBSTITUTE(_d, _chr, "")),
_fq, MMULT(SEQUENCE(1, ROWS(_d), 1, 0), _cnt),
_lg, LARGE(UNIQUE(_fq, 1), _n),
_r, SORT(TRANSPOSE(FILTER(VSTACK(_chr, _fq), _fq >= _lg)), 2, -1),
_r
)
Excel solution 8 for Top Initial Letters Count, proposed by Timothée BLIOT:
=LET(A,A2:A47,
B,TOCOL(LEFT(TEXTSPLIT(TEXTJOIN("/",,A)," ","/")),3),
C, MAP(B, LAMBDA(x, SUMPRODUCT(1*(x=B)) )),
E, LARGE(UNIQUE(C),5),
SORT(UNIQUE(FILTER(HSTACK(B,C),C>=E)),2,-1))
Excel solution 9 for Top Initial Letters Count, proposed by Hussein SATOUR:
=LET(a, LEFT(TEXTSPLIT(CONCAT(A2:A47&" "),, " ")),
b, MAP(a, LAMBDA(x, SUM((a=x)*1))),
SORT(UNIQUE(FILTER(HSTACK(a,b), b>=LARGE(UNIQUE(b),5))), 2,-1))
Excel solution 10 for Top Initial Letters Count, proposed by Md. Zohurul Islam:
=LET(z,A2:A47,a,DROP(REDUCE("",z,LAMBDA(x,y,VSTACK(x,LEFT(TEXTSPLIT(y,," "))))),1),b,UNIQUE(a),d,MAP(b,LAMBDA(p,SUM(ABS(a=p)))),
e,LARGE(UNIQUE(d),SEQUENCE(5)),f,XMATCH(d,e),g,SORT(FILTER(HSTACK(b,d),ISNUMBER(f)),2,-1),VSTACK(HSTACK("Alphabet","Frequency"),g))
Excel solution 11 for Top Initial Letters Count, proposed by Charles Roldan:
=LET(_COUNTIF, LAMBDA(a, b, MMULT(--(TOROW(a)=b), --(a=a))),
Initials, LEFT(TEXTSPLIT(TEXTJOIN(" ", , A2:A47), , " ")),
uInitials, UNIQUE(Initials), Tally, _COUNTIF(Initials, uInitials),
SORT(FILTER(HSTACK(uInitials, Tally),
Tally>=LARGE(UNIQUE(Tally), 5)), 2, -1))
Excel solution 12 for Top Initial Letters Count, proposed by Jaroslaw Kujawa:
=LET(a, MAP(A2:A47,LAMBDA(b, CONCAT(LEFT(TEXTSPLIT(b," "),1)))), b, CONCAT(a), c, SORT(MID(b,SEQUENCE(,LEN(b)),1),1,-1,TRUE), d, UNIQUE(c,TRUE), e, LEN(b)-LEN(SUBSTITUTE(b,d,"")), f, LARGE(UNIQUE(e,TRUE),5), SORT(TRANSPOSE(FILTER(VSTACK(d,e),e>=f)),2,-1))
Excel solution 13 for Top Initial Letters Count, proposed by Stefan Olsson:
=QUERY(
TRANSPOSE(SPLIT(REGEXREPLACE(TEXTJOIN("|", , A2:A50), "[^A-Z|]", "|"), "|", 1, 1)),
"Select Col1, Count(Col1) Group by Col1 Order by Count(Col1) desc Limit 9 Label Col1 'Letter', Count(Col1) 'Frequency'",
0)
Excel solution 14 for Top Initial Letters Count, proposed by Stefan Olsson:
=ArrayFormula(QUERY(
FLATTEN(LEFT(SPLIT(PROPER(A2:A47), " .", 1, 1), 1)),
"Select Col1, Count(Col1) Where Col1<>'' Group By Col1 Order By Count(Col1) desc Limit 9 Label Col1 'Letters', Count(Col1) 'Frequency'",
0 ))
Excel solution 15 for Top Initial Letters Count, proposed by Victor Momoh (MVP, MOS, R.Eng):
=LET(a,LEFT(TEXTSPLIT(CONCAT(A2:A47&" "),," ")),
b,UNIQUE(a),
c,MAP(b,LAMBDA(&x,SUM(--(a=x)))),
SORT(FILTER(HSTACK(b,c),c>=LARGE(UNIQUE(c),5)),2,-1))
Excel solution 16 for Top Initial Letters Count, proposed by Guillermo Arroyo:
=LET(c,DROP(REDUCE("",A2:A47,LAMBDA(i,j,VSTACK(i,LEFT(TEXTSPLIT(j,,""))))),1),p,CODE(c),f,FREQUENCY(p,p),SORT(FILTER(HSTACK(c,f),f>=LARGE(UNIQUE(f),5)),2,-1))
Excel solution 17 for Top Initial Letters Count, proposed by Quadri Olayinka Atharu:
=LET(K,5,
a,LEFT(TEXTSPLIT(TEXTJOIN(" ",,A2:A47),," ")),
b,UNIQUE(a),
c,MAP(b,LAMBDA(x,SUM(N(a=x)))),
d,LARGE(UNIQUE(c),K),
TAKE(SORT(HSTACK(b,c),2,-1),SUM(N(c>=d))))
Excel solution 18 for Top Initial Letters Count, proposed by Amr Tawfik CMA®,FMVA,Lean Coach:
=LET(X,LEFT(TEXTSPLIT(TEXTJOIN(" ",,A2:A47),," "),1),b,MAP(X,LAMBDA(z,SUM((X=z)*1))),W,
SORT(UNIQUE(HSTACK(X,b)),{2},-1),W)
Solving the challenge of Top Initial Letters Count with Python in Excel
Python in Excel solution 1 for Top Initial Letters Count, proposed by Alejandro Campos:
from collections import Counter
df = pd.DataFrame(Counter([n[0].upper() for p in xl("A1:A47", headers=True)['US Presidents'] for n in p.split()]).items(), columns=['Alphabet', 'Frequency'])
top_letters = df[df['Frequency'] >= sorted(set(df['Frequency']), reverse=True)[4]].sort_values(by='Frequency', ascending=False).reset_index(drop=True)
Solving the challenge of Top Initial Letters Count with SQL
SQL solution 1 for Top Initial Letters Count, proposed by Zoran Milokanović:
WITH /* Microsoft SQL Server 2019 */
DATA_PREP
AS
(
SELECT
SUBSTRING(VALUE, 1, 1) AS ALPHABET
,COUNT(*) AS FREQUENCY
FROM DATA D
CROSS APPLY STRING_SPLIT(D.US_PRESIDENTS, ' ')
GROUP BY
SUBSTRING(VALUE, 1, 1)
)
SELECT
F.ALPHABET
,F.FREQUENCY
FROM
(
SELECT
DP.ALPHABET
,DP.FREQUENCY
,DENSE_RANK() OVER (ORDER BY DP.FREQUENCY DESC) AS TOP_N
FROM DATA_PREP DP
) F
WHERE
F.TOP_N <= 5
;
&&
