Home » Top Initial Letters Count

Top Initial Letters Count

Find the top 5 highest occurring starting alphabets of words in US Presidents columns. If Name is James K. Polk, then starting alphabets are J, K & P. Sort it on the basis of descending order of frequency. This is case insensitive.

📌 Challenge Details and Links
ExcelBI Excel Challenge Number: 138
Challenge Difficulty: ⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Top Initial Letters Count with Power Query

Power Query solution 1 for Top Initial Letters Count, proposed by Zoran Milokanović:
let
  Source = Excel.CurrentWorkbook(){[Name = "Presidents"]}[Content], 
  AddedAlphabet = Table.AddColumn(
    Source, 
    "Alphabet", 
    each List.Transform(Text.Split([US Presidents], " "), each Text.Start(_, 1))
  ), 
  ExpandedAlphabet = Table.ExpandListColumn(AddedAlphabet, "Alphabet"), 
  GroupedAlphabet = Table.Group(
    ExpandedAlphabet, 
    {"Alphabet"}, 
    {{"Frequency", each Table.RowCount(_), Int64.Type}}
  ), 
  GroupedFrequence = Table.Group(GroupedAlphabet, {"Frequency"}, {{"Alphabet", each _[Alphabet]}}), 
  SortedFrequence = Table.Sort(GroupedFrequence, {{"Frequency", Order.Descending}}), 
  AddedTopN = Table.AddIndexColumn(SortedFrequence, "TopN", 1, 1, Int64.Type), 
  FilteredTop5 = Table.SelectRows(AddedTopN, each ([TopN] <= 5)), 
  Solution = Table.ExpandListColumn(FilteredTop5, "Alphabet")[[Alphabet], [Frequency]]
in
  Solution
Power Query solution 2 for Top Initial Letters Count, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Iniciales = Table.AddColumn(
    Source, 
    "Alpha", 
    each 
      let
        a = Text.Split([US Presidents], " "), 
        b = List.Transform(a, each Text.Start(_, 1))
      in
        b
  )[[Alpha]], 
  Listado = Table.ExpandListColumn(Iniciales, "Alpha"), 
  Agrupar = Table.Group(Listado, {"Alpha"}, {{"Freq", each Table.RowCount(_), Int64.Type}}), 
  Top = List.Last(
    List.FirstN(
      Table.Distinct(Table.Sort(Agrupar, {{"Freq", Order.Descending}}), {"Freq"})[Freq], 
      5
    )
  ), 
  Filtro = Table.SelectRows(Agrupar, each [Freq] >= Top), 
  Sol = Table.Sort(Filtro, {{"Freq", Order.Descending}, {"Alpha", Order.Ascending}})
in
  Sol
Power Query solution 3 for Top Initial Letters Count, proposed by Luan Rodrigues:
let
  Fonte = Tabela1, 
  lst = Table.FromList(
    List.Combine(
      Table.AddColumn(
        Fonte, 
        "Personalizar", 
        each Text.ToList(Text.Select([US Presidents], {"A" .. "Z"}))
      )[Personalizar]
    )
  ), 
  gp = Table.Group(lst, {"Column1"}, {{"Contagem", each Table.RowCount(_), Int64.Type}}), 
  fil = Table.SelectRows(
    gp, 
    each List.Contains(List.MaxN(List.Distinct(gp[Contagem]), 5), [Contagem])
  ), 
  res = Table.Sort(fil, {{"Contagem", Order.Descending}})
in
  res
Power Query solution 4 for Top Initial Letters Count, proposed by Brian Julius:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Dupe = Table.DuplicateColumn(Source, "US Presidents", "Alpahbet"), 
  ReplaceVanBuren = Table.ReplaceValue(
    Dupe, 
    "Martin Van Buren", 
    "Martin VanBuren", 
    Replacer.ReplaceText, 
    {"Alpahbet"}
  ), 
  SplitDelim = Table.ExpandListColumn(
    Table.TransformColumns(
      ReplaceVanBuren, 
      {
        {
          "Alpahbet", 
          Splitter.SplitTextByDelimiter(" ", QuoteStyle.Csv), 
          let
            itemType = (type nullable text) meta [Serialized.Text = true]
          in
            type {itemType}
        }
      }
    ), 
    "Alpahbet"
  ), 
  SplitPos = Table.SplitColumn(
    SplitDelim, 
    "Alpahbet", 
    Splitter.SplitTextByPositions({0, 1}, false), 
    {"Alpahbet.1", "Alpahbet.2"}
  ), 
  Group = Table.Group(SplitPos, {"Alpahbet.1"}, {{"Frequency", each Table.RowCount(_), Int64.Type}}), 
  AddRank = Table.RenameColumns(
    Table.RemoveColumns(
      Table.SelectRows(
        Table.AddRankColumn(
          Group, 
          "Rank", 
          {"Frequency", Order.Descending}, 
          [RankKind = RankKind.Dense]
        ), 
        each [Rank] <= 5
      ), 
      "Rank"
    ), 
    {"Alpahbet.1", "Alphabet"}
  )
in
  AddRank
Power Query solution 5 for Top Initial Letters Count, proposed by Jaroslaw Kujawa:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table10"]}[Content], 
  #"US Presidents" = Text.Remove(
    Text.Combine(Source[US Presidents], " "), 
    {"a" .. "z"} & {" ", "."}
  ), 
  #"Imported Text" = Table.FromList(Lines.FromText(#"US Presidents")), 
  #"Split Column by Position" = Table.SplitColumn(
    #"Imported Text", 
    "Column1", 
    Splitter.SplitTextByRepeatedLengths(1), 
    Text.Length(#"Imported Text"[Column1]{0})
  ), 
  #"Unpivoted Columns" = Table.UnpivotOtherColumns(
    #"Split Column by Position", 
    {}, 
    "Attribute", 
    "Value"
  ), 
  #"Removed Columns" = Table.RemoveColumns(#"Unpivoted Columns", {"Attribute"}), 
  #"Grouped Rows" = Table.Group(
    #"Removed Columns", 
    {"Value"}, 
    {{"Count", each Table.RowCount(_), Int64.Type}}
  ), 
  Custom1 = Table.AddColumn(
    #"Grouped Rows", 
    "Custom", 
    each [Count] >= List.Min(List.MaxN(List.Distinct(#"Grouped Rows"[Count]), 5))
  ), 
  #"Filtered Rows" = Table.SelectRows(Custom1, each ([Custom] = true)), 
  #"Sorted Rows" = Table.Sort(#"Filtered Rows", {{"Count", Order.Descending}}), 
  #"Removed Columns1" = Table.RemoveColumns(#"Sorted Rows", {"Custom"})
in
  #"Removed Columns1"
Power Query solution 6 for Top Initial Letters Count, proposed by Rafael González B.:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  ChangeToText = Table.TransformColumnTypes(Source, {{"US Presidents", type text}}), 
  SplitterListToTable = Table.AddColumn(
    ChangeToText, 
    "SplitterList", 
    each Table.FromList(
      Text.Split([US Presidents], " "), 
      Splitter.SplitByNothing(), 
      {"Names"}, 
      null, 
      ExtraValues.Error
    )
  ), 
  ExtractFirstLetters = Table.AddColumn(
    SplitterListToTable, 
    "TableLists", 
    each Table.AddColumn([SplitterList], "Letters", each Text.Start([Names], 1))[[Letters]]
  ), 
  OnluColumnLetters = Table.Combine(
    Table.RemoveColumns(ExtractFirstLetters, {"US Presidents", "SplitterList"})[TableLists]
  ), 
  GroupByLetters = Table.Group(
    OnluColumnLetters, 
    {"Letters"}, 
    {{"Recount", each Table.RowCount(_), Int64.Type}}
  ), 
  SortsByFrequency = Table.Sort(GroupByLetters, {{"Recount", Order.Descending}}), 
  Result = Table.RemoveColumns(
    Table.SelectRows(
      Table.AddRankColumn(
        SortsByFrequency, 
        "Rank", 
        {"Recount", Order.Descending}, 
        [RankKind = RankKind.Dense]
      ), 
      each [Rank] <= 5
    ), 
    {"Rank"}
  ), 
  FinalResult = Table.Sort(Result, {{"Recount", Order.Descending}, {"Letters", Order.Ascending}})
in
  FinalResult
Power Query solution 7 for Top Initial Letters Count, proposed by Krzysztof Kominiak:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  AddLs = List.Combine(
    Table.AddColumn(
      Source, 
      "Ls", 
      each Splitter.SplitTextByRepeatedLengths(1)(
        Text.Select(Text.Proper([US Presidents]), {"A" .. "Z"})
      )
    )[Ls]
  ), 
  ConvToTab = Table.FromList(AddLs, Splitter.SplitByNothing(), {"Letters"}, null, ExtraValues.Error), 
  GroupRows = Table.Group(
    ConvToTab, 
    {"Letters"}, 
    {{"Frequency", each Table.RowCount(_), type number}}
  ), 
  Result = Table.Sort(
    Table.SelectRows(
      GroupRows, 
      each [Frequency] >= List.MaxN(List.Distinct(GroupRows[Frequency]), 5){4}
    ), 
    {{"Frequency", Order.Descending}, {"Letters", Order.Ascending}}
  )
in
  Result
Power Query solution 8 for Top Initial Letters Count, proposed by Jan Willem Van Holst:
let
 Source = Your data,
 transToList = Table.TransformColumns(Source,{{"US Presidents", each List.Transform(Text.Split(_, " "), each Text.Start(_,1))}}),
 combine = Table.FromList(List.Combine(transToList[US Presidents])),
 #"Grouped Rows" = Table.Group(combine, {"Column1"}, {{"Count", each Table.RowCount(_), Int64.Type}}),
 selectList = List.FirstN( List.Distinct(List.Sort(#"Grouped Rows"[Count], Order.Descending)), 5),
 select = Table.SelectRows(#"Grouped Rows", each List.Contains(selectList, [Count])),
 #"Sorted Rows" = Table.Sort(select,{{"Count", Order.Descending}})
in
 #"Sorted Rows"


                    
                  
          
Power Query solution 9 for Top Initial Letters Count, proposed by Sue Bayes:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Duplicate = Table.DuplicateColumn(Source, "US Presidents", "Copy"), 
  Split = Table.ExpandListColumn(
    Table.TransformColumns(
      Duplicate, 
      {
        {
          "Copy", 
          Splitter.SplitTextByDelimiter(" ", QuoteStyle.Csv), 
          let
            itemType = (type nullable text) meta [Serialized.Text = true]
          in
            type {itemType}
        }
      }
    ), 
    "Copy"
  ), 
  FirstChar = Table.TransformColumns(Split, {{"Copy", each Text.Start(_, 1), type text}}), 
  Group = Table.Sort(
    Table.Group(FirstChar, {"Copy"}, {{"Count", each Table.RowCount(_), Int64.Type}}), 
    {{"Count", Order.Descending}}
  ), 
  Rank = Table.AddRankColumn(
    Group, 
    "Rank", 
    {"Count", Order.Descending}, 
    [RankKind = RankKind.Dense]
  ), 
  Filter = Table.SelectRows(Rank, each [Rank] < 6)[[Copy], [Count]]
in
  Filter

Solving the challenge of Top Initial Letters Count with Excel

Excel solution 1 for Top Initial Letters Count, proposed by Bo Rydobon 🇹🇭:
=LET(a,CHAR(SEQUENCE(26)+64),c,CONCAT(A2:A47),l,LEN(c)-LEN(SUBSTITUTE(c,a,)),
SORT(FILTER(HSTACK(a,l),l>=LARGE(UNIQUE(l),5)),2,-1))
Excel solution 2 for Top Initial Letters Count, proposed by Rick Rothstein:
=LET(f,LEFT(TEXTSPLIT(CONCAT(PROPER(A2:A47)&" "),," ")),u,UNIQUE(f),n,MAP(u,LAMBDA(x,COUNTA(TEXTSPLIT(c,x))-1)),SORT(FILTER(HSTACK(u,n),n>=LARGE(UNIQUE(n),5)),2,-1))
Excel solution 3 for Top Initial Letters Count, proposed by John V.:
=LET(b,LEFT(TEXTSPLIT(CONCAT(A2:A47&" "),," ")),u,UNIQUE(b),f,MAP(u,LAMBDA(x,SUM(N(b=x)))),SORT(FILTER(HSTACK(u,f),f>=LARGE(UNIQUE(f),5)),2,-1))
Excel solution 4 for Top Initial Letters Count, proposed by محمد حلمي:
=LET(
i,CHAR(ROW(65:90)),
v,TEXTSPLIT(CONCAT(A2:A47&" ")," ",,1),
r,MMULT(--(i=LEFT(v)),TOCOL(LEN(v)^0)),
SORT(FILTER(HSTACK(i,r),r>=LARGE(UNIQUE(r),5)),
2,-1))
Excel solution 5 for Top Initial Letters Count, proposed by Kris Jaganah:
=LET(a,A2:A47,b,LEFT(TEXTSPLIT(CONCAT(a&" "),," ")),c,UNIQUE(b),d,MAP(c,LAMBDA(x,SUM(--(b=x)))),SORT(FILTER(HSTACK(c,d),d>=LARGE(UNIQUE(d),5)),2,-1))
Excel solution 6 for Top Initial Letters Count, proposed by Julian Poeltl:
=LET(N,LEFT(TEXTSPLIT(TEXTJOIN(" ",,A2:A47)," "),1),A,CHAR(64+SEQUENCE(26)),C,MAP(A,LAMBDA(A,COUNTA(FILTER(N,N=A)))),SORT(FILTER(HSTACK(A,C),C>=LARGE(UNIQUE(C),5)),2,-1))
Excel solution 7 for Top Initial Letters Count, proposed by Aditya Kumar Darak 🇮🇳:
=LET(
 _n, 5,
 _d, A2:A47,
 _chr, CHAR(SEQUENCE(1, 26, 65)),
 _cnt, LEN(_d) - LEN(SUBSTITUTE(_d, _chr, "")),
 _fq, MMULT(SEQUENCE(1, ROWS(_d), 1, 0), _cnt),
 _lg, LARGE(UNIQUE(_fq, 1), _n),
 _r, SORT(TRANSPOSE(FILTER(VSTACK(_chr, _fq), _fq >= _lg)), 2, -1),
 _r
)
Excel solution 8 for Top Initial Letters Count, proposed by Timothée BLIOT:
=LET(A,A2:A47,
B,TOCOL(LEFT(TEXTSPLIT(TEXTJOIN("/",,A)," ","/")),3),
C, MAP(B, LAMBDA(x, SUMPRODUCT(1*(x=B)) )),
E, LARGE(UNIQUE(C),5),
SORT(UNIQUE(FILTER(HSTACK(B,C),C>=E)),2,-1))
Excel solution 9 for Top Initial Letters Count, proposed by Hussein SATOUR:
=LET(a, LEFT(TEXTSPLIT(CONCAT(A2:A47&" "),, " ")),
b, MAP(a, LAMBDA(x, SUM((a=x)*1))),
SORT(UNIQUE(FILTER(HSTACK(a,b), b>=LARGE(UNIQUE(b),5))), 2,-1))
Excel solution 10 for Top Initial Letters Count, proposed by Md. Zohurul Islam:
=LET(z,A2:A47,a,DROP(REDUCE("",z,LAMBDA(x,y,VSTACK(x,LEFT(TEXTSPLIT(y,," "))))),1),b,UNIQUE(a),d,MAP(b,LAMBDA(p,SUM(ABS(a=p)))),
e,LARGE(UNIQUE(d),SEQUENCE(5)),f,XMATCH(d,e),g,SORT(FILTER(HSTACK(b,d),ISNUMBER(f)),2,-1),VSTACK(HSTACK("Alphabet","Frequency"),g))
Excel solution 11 for Top Initial Letters Count, proposed by Charles Roldan:
=LET(_COUNTIF, LAMBDA(a, b, MMULT(--(TOROW(a)=b), --(a=a))), 
Initials, LEFT(TEXTSPLIT(TEXTJOIN(" ", , A2:A47), , " ")), 
uInitials, UNIQUE(Initials), Tally, _COUNTIF(Initials, uInitials), 
SORT(FILTER(HSTACK(uInitials, Tally), 
Tally>=LARGE(UNIQUE(Tally), 5)), 2, -1))
Excel solution 12 for Top Initial Letters Count, proposed by Jaroslaw Kujawa:
=LET(a, MAP(A2:A47,LAMBDA(b, CONCAT(LEFT(TEXTSPLIT(b," "),1)))),         b, CONCAT(a),         c, SORT(MID(b,SEQUENCE(,LEN(b)),1),1,-1,TRUE),         d, UNIQUE(c,TRUE),         e, LEN(b)-LEN(SUBSTITUTE(b,d,"")),         f, LARGE(UNIQUE(e,TRUE),5),         SORT(TRANSPOSE(FILTER(VSTACK(d,e),e>=f)),2,-1))
Excel solution 13 for Top Initial Letters Count, proposed by Stefan Olsson:
=QUERY(
TRANSPOSE(SPLIT(REGEXREPLACE(TEXTJOIN("|", , A2:A50), "[^A-Z|]", "|"), "|", 1, 1)), 
"Select Col1, Count(Col1) Group by Col1 Order by Count(Col1) desc Limit 9 Label Col1 'Letter', Count(Col1) 'Frequency'",
0)
Excel solution 14 for Top Initial Letters Count, proposed by Stefan Olsson:
=ArrayFormula(QUERY(
FLATTEN(LEFT(SPLIT(PROPER(A2:A47), " .", 1, 1), 1)),
"Select Col1, Count(Col1) Where Col1<>'' Group By Col1 Order By Count(Col1) desc Limit 9 Label Col1 'Letters', Count(Col1) 'Frequency'",
0 ))
Excel solution 15 for Top Initial Letters Count, proposed by Victor Momoh (MVP, MOS, R.Eng):
=LET(a,LEFT(TEXTSPLIT(CONCAT(A2:A47&" "),," ")),
b,UNIQUE(a),
c,MAP(b,LAMBDA(&x,SUM(--(a=x)))),
SORT(FILTER(HSTACK(b,c),c>=LARGE(UNIQUE(c),5)),2,-1))
Excel solution 16 for Top Initial Letters Count, proposed by Guillermo Arroyo:
=LET(c,DROP(REDUCE("",A2:A47,LAMBDA(i,j,VSTACK(i,LEFT(TEXTSPLIT(j,,""))))),1),p,CODE(c),f,FREQUENCY(p,p),SORT(FILTER(HSTACK(c,f),f>=LARGE(UNIQUE(f),5)),2,-1))
Excel solution 17 for Top Initial Letters Count, proposed by Quadri Olayinka Atharu:
=LET(K,5,
a,LEFT(TEXTSPLIT(TEXTJOIN(" ",,A2:A47),," ")),
b,UNIQUE(a),
c,MAP(b,LAMBDA(x,SUM(N(a=x)))),
d,LARGE(UNIQUE(c),K),
TAKE(SORT(HSTACK(b,c),2,-1),SUM(N(c>=d))))
Excel solution 18 for Top Initial Letters Count, proposed by Amr Tawfik CMA®,FMVA,Lean Coach:
=LET(X,LEFT(TEXTSPLIT(TEXTJOIN(" ",,A2:A47),," "),1),b,MAP(X,LAMBDA(z,SUM((X=z)*1))),W,
SORT(UNIQUE(HSTACK(X,b)),{2},-1),W)

Solving the challenge of Top Initial Letters Count with Python in Excel

Python in Excel solution 1 for Top Initial Letters Count, proposed by Alejandro Campos:
from collections import Counter
df = pd.DataFrame(Counter([n[0].upper() for p in xl("A1:A47", headers=True)['US Presidents'] for n in p.split()]).items(), columns=['Alphabet', 'Frequency'])
top_letters = df[df['Frequency'] >= sorted(set(df['Frequency']), reverse=True)[4]].sort_values(by='Frequency', ascending=False).reset_index(drop=True)
                    
                  

Solving the challenge of Top Initial Letters Count with SQL

SQL solution 1 for Top Initial Letters Count, proposed by Zoran Milokanović:
WITH /* Microsoft SQL Server 2019 */
DATA_PREP
AS
(
 SELECT
 SUBSTRING(VALUE, 1, 1) AS ALPHABET
 ,COUNT(*) AS FREQUENCY
 FROM DATA D
 CROSS APPLY STRING_SPLIT(D.US_PRESIDENTS, ' ')
 GROUP BY
 SUBSTRING(VALUE, 1, 1)
)
SELECT
 F.ALPHABET
,F.FREQUENCY
FROM
(
 SELECT
 DP.ALPHABET
 ,DP.FREQUENCY
 ,DENSE_RANK() OVER (ORDER BY DP.FREQUENCY DESC) AS TOP_N
 FROM DATA_PREP DP
) F
WHERE
 F.TOP_N <= 5
;
                    
                  

&&

Leave a Reply