Home » Top Words in Presidents

Top Words in Presidents

Column A contains list of US presidents. List the top 3 most frequently occurring words from column A. A valid word is that word which is not a single alphabet. Hence, S. is not a valid word in Harry S. Truman. This is a case insensitive problem.

📌 Challenge Details and Links
ExcelBI Excel Challenge Number: 123
Challenge Difficulty: ⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Top Words in Presidents with Power Query

Power Query solution 1 for Top Words in Presidents, proposed by Bo Rydobon 🇹🇭:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Split = Table.ExpandListColumn(
    Table.TransformColumns(
      Source, 
      {"US Presidents", Splitter.SplitTextByAnyDelimiter({". ", " "}, QuoteStyle.None)}
    ), 
    "US Presidents"
  ), 
  Group = Table.Sort(
    Table.Group(Split, {"US Presidents"}, {{"Count", each Table.RowCount(_)}}), 
    {{"Count", 1}}
  ), 
  Filtered = Table.SelectRows(
    Group, 
    each [Count] >= Group[Count]{3} and Text.Length([US Presidents]) > 1
  )[US Presidents]
in
  Filtered
Power Query solution 2 for Top Words in Presidents, proposed by Aditya Kumar Darak 🇮🇳:
let
  Source = Excel.CurrentWorkbook(){[Name = "data"]}[Content], 
  TopN = 3, 
  Transform = Table.ExpandListColumn(
    Table.TransformColumns(Source, {"US Presidents", each Text.SplitAny(_, " .")}), 
    "US Presidents"
  ), 
  Group = Table.Group(Transform, {"US Presidents"}, {{"Count", each Table.RowCount(_)}}), 
  Filter = Table.SelectRows(Group, each [US Presidents] <> "" and Text.Length([US Presidents]) > 1), 
  TopNFilter = Table.SelectRows(Filter, each [Count] >= List.MaxN(Filter[Count], TopN){TopN - 1}), 
  Return = Table.Sort(TopNFilter, {{"Count", 1}})[[US Presidents]]
in
  Return
Power Query solution 3 for Top Words in Presidents, proposed by Luan Rodrigues:
let
  Fonte = Tabela1, 
  tab = Table.FromList(
    List.Select(
      List.Combine(
        Table.AddColumn(Fonte, "Personalizar", each Text.Split([US Presidents], " "))[Personalizar]
      ), 
      each Text.Contains(_, ".") = false
    ), 
    null, 
    {"US Presidents"}
  ), 
  gp = Table.Group(tab, {"US Presidents"}, {{"Contagem", each Table.RowCount(_)}}), 
  result = Table.MaxN(gp, "Contagem", each [Contagem] > 2)[[US Presidents]]
in
  result
Power Query solution 4 for Top Words in Presidents, proposed by Brian Julius:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Split = Table.ExpandListColumn(
    Table.TransformColumns(
      Source, 
      {
        {
          "US Presidents", 
          Splitter.SplitTextByDelimiter(" ", QuoteStyle.Csv), 
          let
            itemType = (type nullable text) meta [Serialized.Text = true]
          in
            type {itemType}
        }
      }
    ), 
    "US Presidents"
  ), 
  ReplacePer = Table.ReplaceValue(Split, ".", "", Replacer.ReplaceText, {"US Presidents"}), 
  FilterInitials = Table.SelectRows(ReplacePer, each Text.Length([US Presidents]) > 1), 
  Group = Table.Group(
    FilterInitials, 
    {"US Presidents"}, 
    {
      {"Count", each Table.RowCount(_), Int64.Type}, 
      {"All", each _, type table [US Presidents = nullable text]}
    }
  ), 
  AddRank = Table.AddRankColumn(
    Group, 
    "Rank", 
    {"Count", Order.Descending}, 
    [RankKind = RankKind.Dense]
  ), 
  Filter = Table.RenameColumns(
    Table.SelectColumns(Table.SelectRows(AddRank, each [Rank] <= 3), "US Presidents"), 
    {"US Presidents", "Answer Expected"}
  )
in
  Filter
Power Query solution 5 for Top Words in Presidents, proposed by Bhavya Gupta:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Transform = Table.ExpandListColumn(
    Table.TransformColumns(
      Source, 
      {
        {
          "US Presidents", 
          each List.Select(
            Text.Split(_, " "), 
            each Text.Length(Text.Select(_, {"A" .. "Z", "a" .. "z"})) > 1
          )
        }
      }
    ), 
    "US Presidents"
  ), 
  G_1 = Table.Group(Transform, {"US Presidents"}, {{"Count", each Table.RowCount(_), Int64.Type}}), 
  G_2 = Table.Group(G_1, {"Count"}, {{"All", each [US Presidents]}}), 
  Final = List.Combine(Table.FirstN(Table.Sort(G_2, {{"Count", Order.Descending}}), 3)[All])
in
  Final
Power Query solution 6 for Top Words in Presidents, proposed by Venkata Rajesh:
let
  Source = Data[US Presidents], 
  Words = List.Combine(List.Transform(Source, each Text.Split(_, " "))), 
  List = List.Select(Words, each Text.Length(Text.Select(_, {"A" .. "Z", "a" .. "z"})) > 1), 
  Max = List.Max(
    List.Transform(
      List, 
      each 
        let
          name = _
        in
          List.Count(List.Select(List, each _ = name))
    )
  ), 
  Result = List.Distinct(
    List.Select(
      List, 
      each 
        let
          name = _
        in
          List.Count(List.Select(List, each _ = name)) > Max - 3
    )
  )
in
  Result
Power Query solution 7 for Top Words in Presidents, proposed by Krzysztof Kominiak:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  AddNames = Table.ExpandListColumn(
    Table.AddColumn(
      Source, 
      "Answer Expected", 
      each List.RemoveFirstN(
        List.Reverse(
          List.Select(Text.Split(Text.Remove([US Presidents], "."), " "), each Text.Length(_) > 1)
        ), 
        1
      )
    )[[Answer Expected]], 
    "Answer Expected"
  ), 
  GroupRows = Table.Group(
    AddNames, 
    {"Answer Expected"}, 
    {{"Count", each Table.RowCount(_), type number}}
  ), 
  Result = Table.Sort(
    Table.SelectRows(GroupRows, each [Count] >= List.MaxN(GroupRows[Count], 3){2}), 
    {{"Count", Order.Descending}, {"Answer Expected", Order.Ascending}}
  )[[Answer Expected]]
in
  Result
Power Query solution 8 for Top Words in Presidents, proposed by Jan Willem Van Holst:
let
  Source = YourData, 
  #"Split Column by Delimiter" = Table.ExpandListColumn(
    Table.TransformColumns(
      Source, 
      {
        {
          "US Presidents", 
          Splitter.SplitTextByDelimiter(" ", QuoteStyle.Csv), 
          let
            itemType = (type nullable text) meta [Serialized.Text = true]
          in
            type {itemType}
        }
      }
    ), 
    "US Presidents"
  ), 
  #"Replaced Value" = Table.ReplaceValue(
    #"Split Column by Delimiter", 
    ".", 
    "", 
    Replacer.ReplaceText, 
    {"US Presidents"}
  ), 
  #"Filtered Rows" = Table.SelectRows(#"Replaced Value", each Text.Length([US Presidents]) <> 1), 
  #"Grouped Rows" = Table.Group(
    #"Filtered Rows", 
    {"US Presidents"}, 
    {{"Count", each Table.RowCount(_), Int64.Type}}
  ), 
  #"Filtered Rows1" = Table.SelectRows(
    #"Grouped Rows", 
    each List.Contains(List.FirstN(List.Sort(#"Grouped Rows"[Count], Order.Descending), 3), [Count])
  ), 
  #"Sorted Rows" = Table.Sort(
    #"Filtered Rows1", 
    {{"Count", Order.Descending}, {"US Presidents", Order.Ascending}}
  ), 
  #"Removed Columns" = Table.RemoveColumns(#"Sorted Rows", {"Count"})
in
  #"Removed Columns"
Power Query solution 9 for Top Words in Presidents, proposed by CA Vikal Jain:
let
 Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
 #"Split Column by Delimiter" = Table.SplitColumn(Source, "US Presidents", Splitter.SplitTextByDelimiter(" ", QuoteStyle.Csv), {"US Presidents.1", "US Presidents.2", "US Presidents.3"}),
 Tocol = Table.FromList(List.Combine(Table.ToColumns(#"Split Column by Delimiter"))),
 #"Grouped Rows" = Table.Group(Tocol, {"Column1"}, {{"Count", each Table.RowCount(_), Int64.Type}}),
 #"Filtered Rows" = Table.SelectRows(#"Grouped Rows", each ([Column1] <> "")),
 Results" = Table.FirstN(Table.Sort(#"Filtered Rows",{{"Count", Order.Descending}}),3)
in
Results
                    
                  
          

Solving the challenge of Top Words in Presidents with Excel

Excel solution 1 for Top Words in Presidents, proposed by Bo Rydobon 🇹🇭:
=LET(
    w,
    SORT(
        TEXTSPLIT(
            CONCAT(
                A2:A47&" "
            ),
            ,
            " "
        )
    ),
    u,
    UNIQUE(
        w
    ),
    c,
    XMATCH(
        u,
        w,
        ,
        -1
    )-XMATCH(
        u,
        w
    ),
    FILTER(
        SORTBY(
            u,
            -c
        ),
        SORT(
            c,
            ,
            -1
        )>=LARGE(
            c,
            3
        )
    )
)
Excel solution 2 for Top Words in Presidents, proposed by Rick Rothstein:
=LET(
    a,
    A2:A47,
    t,
    TEXTSPLIT(
        TEXTJOIN(
            " ",
            ,
            a
        ),
        ,
        " "
    ),
    u,
    UNIQUE(
        FILTER(
            t,
            LEN(
                t
            )>2
        )
    ),
    c,
    COUNTIF(
        a,
        "*"&u&"*"
    ),
    FILTER(
        u,
        c>=LARGE(
            UNIQUE(
                c
            ),
            3
        )
    )
)
Excel solution 3 for Top Words in Presidents, proposed by John V.:
=LET(w,
    TEXTSPLIT(
        CONCAT(
            A2:A47&" "
        ),
        ,
        " "
    ),
    u,
    UNIQUE(
        FILTER(
            w,
            LEN(
                w
            )>2
        )
    ),
    n,
    MAP(u,
    LAMBDA(x,
    SUM(--(w=x)))),
    s,
    SORT(
        HSTACK(
            n,
            u
        ),
        ,
        -1
    ),
    FILTER(
        DROP(
            s,
            ,
            1
        ),
        TAKE(
            s,
            ,
            1
        )>=LARGE(
            n,
            3
        )
    ))

✅=LET(w,
    TEXTSPLIT(
        CONCAT(
            A2:A47&" "
        ),
        ,
        " "
    ),
    u,
    UNIQUE(
        FILTER(
            w,
            LEN(
                w
            )>2
        )
    ),
    n,
    MAP(u,
    LAMBDA(x,
    SUM(--(w=x)))),
    FILTER(
        SORTBY(
            u,
            n,
            -1
        ),
        SORT(
            n,
            ,
            -1
        )>=LARGE(
            n,
            3
        )
    ))
Excel solution 4 for Top Words in Presidents, proposed by محمد حلمي:
=LET(
a,
    TEXTSPLIT(
        CONCAT(
            A2:A47&" "
        ),
        ,
        " "
    ),
    
v,
    UNIQUE(
        a
    ),
    
r,
    MAP(v,
    LAMBDA(c,
    SUM(--(c=a)))),
    
FILTER(
    SORTBY(
        v,
        r,
        -1
    ),
    SORT(
        r,
        ,
        -1
    )>=LARGE(
        r,
        3
    )
))
Excel solution 5 for Top Words in Presidents, proposed by 🇰🇷 Taeyong Shin:
=LET(
    t,
    TEXTSPLIT(
        TEXTJOIN(
            " ",
            ,
            A2:A47
        ),
        ,
        " "
    ),
    g,
    GROUPBY(
        t,
        t,
        ROWS,
        ,
        0,
        -2
    ),
    TAKE(
        g,
        XMATCH(
            LARGE(
                g,
                3
            ),
            DROP(
                g,
                ,
                1
            ),
            ,
            -1
        ),
        1
    )
)
Excel solution 6 for Top Words in Presidents, proposed by 🇰🇷 Taeyong Shin:
=LET(
    t,
    TEXTSPLIT(
        TEXTJOIN(
            " ",
            ,
            A2:A47
        ),
        ,
        " "
    ),
    R,
    LAMBDA(
        R,
        num,
        [i],
        LET(
            k,
            MODE.MULT(
                num
            ),
            v,
            INDEX(
                t,
                k,
                1
            ),
            IF(
                i+1<3,
                VSTACK(
                    v,
                    R(
                        R,
                        FILTER(
                            num,
                            ISNA(
                                XMATCH(
                                    num,
                                    k
                                )
                            )
                        ),
                        i+1
                    )
                ),
                v
            )
        )
    ),
    R(
        R,
        XMATCH(
            t,
            t
        )
    )
)
Excel solution 7 for Top Words in Presidents, proposed by 🇰🇷 Taeyong Shin:
=LET(
    
     str,
     TEXTSPLIT(
         TEXTJOIN(
             " ",
              ,
              A2:A47
         ),
          ,
          " "
     ) & " ",
    
     u,
     UNIQUE(
         str
     ),
    
     n,
     COUNTIF(
         A2:A47,
          "*" & u & "*"
     ),
    
     TOCOL(
         SORTBY(
             IF(
                 n >= LARGE(
                     n,
                      3
                 ),
                  u,
                  x
             ),
              -n
         ),
          2
     )
    
)


=LET(
    
     str,
     TEXTSPLIT(
         CONCAT(
             A2:A47 & " "
         ),
          ,
          " ",
          1
     ),
    
     n,
     XMATCH(
         str,
          UNIQUE(
         str
     )
     ),
    
     frq,
     FREQUENCY(
         n,
          n
     ),
    
     TOCOL(
         SORTBY(
             IF(
                 frq >= LARGE(
                     frq,
                      3
                 ),
                  str,
                  x
             ),
              -frq
         ),
          2
     )
    
)


=LET(
    
     str,
     SORT(
         TEXTSPLIT(
             CONCAT(
                 A2:A47 & " "
             ),
              ,
              " ",
              1
         )
     ),
    
     u,
     UNIQUE(
         str
     ),
    
     n,
     XMATCH(
         u,
          str,
          ,
          -1
     ) - XMATCH(
         u,
          str
     ),
    
     TOCOL(
         SORTBY(
             IF(
                 n >= LARGE(
                     n,
                      3
                 ),
                  u,
                  x
             ),
              -n
         ),
          2
     )
    
)
Excel solution 8 for Top Words in Presidents, proposed by Kris Jaganah:
=LET(a,
    A2:A47,
    b,
    TRIM(
        TEXTSPLIT(
            TEXTJOIN(
                " ",
                TRUE,
                IFERROR(
                    REPLACE(
                        a,
                        FIND(
                            ".",
                            a
                        )-1,
                        2,
                        ""
                    ),
                    a
                )
            ),
            ,
            " "
        )
    ),
    c,
    UNIQUE(
        FILTER(
            b,
            b<>""
        )
    ),
    d,
    MAP(c,
    LAMBDA(x,
    SUM(--(x=b)))),
    e,
    SORTBY(
        HSTACK(
            c,
            d
        ),
        d,
        -1
    ),
    FILTER(CHOOSECOLS(
        e,
        1
    ),
    (CHOOSECOLS(
        e,
        2
    )>=LARGE(
        CHOOSECOLS(
        e,
        2
    ),
        3
    ))))
Excel solution 9 for Top Words in Presidents, proposed by Julian Poeltl:
=LET(
    N,
    TEXTSPLIT(
        TEXTJOIN(
            " ",
            ,
            A2:A47
        ),
        ,
        " "
    ),
    F,
    FILTER(
        N,
        LEN(
            N
        )>2
    ),
    U,
    UNIQUE(
        F
    ),
    C,
    MAP(
        U,
        LAMBDA(
            A,
            ROWS(
                FILTER(
                    F,
                    F=A
                )
            )
        )
    ),
    S,
    SORTBY(
        U,
        C,
        -1
    ),
    SC,
    SORT(
        C,
        ,
        -1
    ),
    FILTER(
        S,
        SC>=LARGE(
            C,
            3
        )
    )
)
Excel solution 10 for Top Words in Presidents, proposed by Aditya Kumar Darak 🇮🇳:
=LET(
 _n,
     3,
    
 _d,
     A2:A47,
    
 _e,
     LAMBDA(
         a,
          b,
          VSTACK(
              a,
               TEXTSPLIT(
                   b,
                    ,
                    {" ",
                    "."},
                    1
               )
          )
     ),
    
 _s,
     REDUCE(
         "",
          _d,
          _e
     ),
    
 _m,
     XMATCH(
         _s,
          _s
     ),
    
 _fq,
     DROP(
         FREQUENCY(
             _m,
              _m
         ),
          -1
     ),
    
 _f,
     FILTER(HSTACK(
         _s,
          _fq
     ),
     (_fq >= LARGE(
         _fq,
          _n
     )) * (LEN(
         _s
     ) > 1)),
    
 _r,
     TAKE(
         SORT(
             _f,
              2,
              -1
         ),
          ,
          1
     ),
    
 _r
)
Excel solution 11 for Top Words in Presidents, proposed by Timothée BLIOT:
=LET(A,
     SUBSTITUTE(
         A2:A47,
         ".",
         ""
     ),
    
B,
     TEXTSPLIT(
         TEXTJOIN(
             "/",
             ,
             A
         ),
         " ",
         "/"
     ),
    
C,
     TOCOL(
         IF(
             LEN(
                 B
             )=1,
             "",
             B
         ),
         3
     ),
    
D,
     MAP(C,
    LAMBDA(a,
    MIN(
        LEN(
            a
        ),
        1
    )*SUMPRODUCT(1*(C=a)))),
    
E,
     LARGE(
         UNIQUE(
             D
         ),
         3
     ),
     UNIQUE(
         FILTER(
             SORTBY(
                 C,
                 D,
                 -1
             ),
             SORT(
                 D,
                 ,
                 -1
             )>=E
         )
     ))
Excel solution 12 for Top Words in Presidents, proposed by Hussein SATOUR:
=LET(
 t,
     TEXTSPLIT(
         CONCAT(
             A2:A47& " "
         ),
          ,
          " "
     ),
    
 a,
     FILTER(
         t,
          LEN(
              t
          ) > 2
     ),
     b,
     UNIQUE(
         a
     ),
    
 c,
     MAP(b,
     LAMBDA(x,
     SUM((a=x)*1))),
    
 FILTER(
     HSTACK(
         b,
          c
     ),
      c > LARGE(
          UNIQUE(
              c
          ),
           4
      )
 ))
Excel solution 13 for Top Words in Presidents, proposed by Md. Zohurul Islam:
=LET(
    z,
    A2:A47,
    
    P,
    DROP(
        REDUCE(
            "",
            z,
            LAMBDA(
                x,
                y,
                VSTACK(
                    x,
                    TOCOL(
                        TEXTSPLIT(
                            y,
                            " "
                        )
                    )
                )
            )
        ),
        1
    ),
    
    Q,
    LEN(
        P
    ),
    
    S,
    FILTER(
        P,
        Q>2
    ),
    
    U,
    UNIQUE(
        SORT(
            S
        )
    ),
    
    V,
    MAP(
        U,
        LAMBDA(
            a,
            SUM(
                ABS(
                    S=a
                )
            )
        )
    ),
    
    rng,
    SORTBY(
        HSTACK(
            U,
            V
        ),
        V,
        -1
    ),
    
    W,
    FILTER(
        rng,
        TAKE(
            rng,
            ,
            -1
        )>=3
    ),
    
    result,
    TAKE(
        W,
        ,
        1
    ),
    
    result
)
Excel solution 14 for Top Words in Presidents, proposed by Charles Roldan:
=LET(
    f,
    LAMBDA(
        x,
        TEXTSPLIT(
            TEXTJOIN(
                " ",
                ,
                x
            ),
            ,
            " "
        )
    ),
    Names,
    f(
        A2:A47
    ),
    
    _COUNTIF,
    LAMBDA(
        a,
        b,
        BYROW(
            TOROW(
                a
            )=b,
            LAMBDA(
                x,
                SUM(
                    --x
                )
            )
        )
    ),
    
    Words,
    UNIQUE(
        Names
    ),
    Counts,
    _COUNTIF(
        Names,
        Words
    ),
    
    f(
        MAP(
            LARGE(
                Counts,
                SEQUENCE(
                    3
                )
            ),
            
            LAMBDA(
                x,
                TEXTJOIN(
                    " ",
                    ,
                    FILTER(
                        Words,
                        Counts=x
                    )
                )
            )
        )
    )
)
Excel solution 15 for Top Words in Presidents, proposed by Stefan Olsson:
=INDEX(
    
    SORTN(
        
        QUERY(
            
            TRANSPOSE(
                SPLIT(
                    REGEXREPLACE(
                        TEXTJOIN(
                            " ",
                             true,
                             A2:A50
                        ),
                         " w.? ",
                         " "
                    ),
                     " ",
                     true,
                     true
                )
            ),
             
            "select Col1, count(Col1) group by Col1 order by count(Col1) desc Label count(Col1) ''",
            0
        ),
        
        3,
         3,
         2,
         false
    )
    ,
    ,
    1
)
Excel solution 16 for Top Words in Presidents, proposed by Guillermo Arroyo:
=LET(n,
    TEXTSPLIT(
        TEXTJOIN(
            " ",
            ,
            SUBSTITUTE(
                A2:A47,
                ".",
                ""
            )
        ),
        ,
        " "
    ),
    f,
    FILTER(
        n,
        LEN(
            n
        )>1
    ),
    un,
    UNIQUE(
        f
    ),
    p,
    MMULT(--(un=TRANSPOSE(
        f
    )),
    SEQUENCE(
        ROWS(
        f
    ),
        ,
        1,
        0
    )),
    q,
    AGGREGATE(
        14,
        4,
        p,
        {1;2;3}
    ),
    FILTER(SORTBY(
        un,
        p,
        -1
    ),
    MMULT(--(SORT(
        p,
        ,
        -1
    )=TRANSPOSE(
        q
    )),
    {1;1;1})))
Excel solution 17 for Top Words in Presidents, proposed by Quadri Olayinka Atharu:
=LET(a,
    TOCOL(
        TEXTSPLIT(
            TEXTJOIN(
                "|",
                ,
                A2:A47,
                ,
                
            ),
            "|",
            " ",
            0,
            ,
            ""
        )
    ),
    
b,
    SUBSTITUTE(
        a,
        ".",
        ""
    ),
    
c,
    FILTER(b,
    (b<>"")*(LEN(
        b
    )>1)),
    
d,
    MAP(
        c,
        LAMBDA(
            x,
            SUM(
                N(
                    x=c
                )
            )
        )
    ),
    
UNIQUE(FILTER(c,
    (d>=LARGE(
        UNIQUE(
            d
        ),
        3
    )))))

Solving the challenge of Top Words in Presidents with SQL

SQL solution 1 for Top Words in Presidents, proposed by Zoran Milokanović:
WITH /* Microsoft SQL Server 2019 */
DATA_PREP
AS
(
 SELECT
 VALUE
 ,COUNT(*) AS OCCURRENCE
 FROM DATA D
 CROSS APPLY STRING_SPLIT(D.US_PRESIDENTS, ' ')
 WHERE
 1 = 1
 AND CHARINDEX('.', VALUE) = 0
 GROUP BY
 VALUE
)
SELECT
 F.VALUE AS ANSWER_EXPECTED
FROM
(
 SELECT
 DP.VALUE
 ,DENSE_RANK() OVER (ORDER BY DP.OCCURRENCE DESC) AS TOP_N
 FROM DATA_PREP DP
) F
WHERE
 F.TOP_N <= 3
ORDER BY
 F.TOP_N
,F.VALUE
;
                    
                  

&&

Leave a Reply