Home » Data Grouping

Data Grouping

Group the staff into their respective salary ranges Dynamic array function allowed, but Extra marks for Legacy solutions or PowerQuery Solution

📌 Challenge Details and Links
Challenge Number: 43
Challenge Difficulty: ⭐
📥Link to the solutions on LinkedIn

Solving the challenge of Data Grouping with Power Query

Power Query solution 1 for Data Grouping, proposed by Zoran Milokanović:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table3"]}[Content], 
  S = Table.FromRows(
    List.TransformMany(
      {{1000, 4999}, {5000, 9999}, {10000, 14999}, {15000}}, 
      each {
        Text.Combine(
          Table.SelectRows(Source, (r) => r[Salary] >= _{0} and r[Salary] <= (_{1}? ?? r[Salary]))[
            Staff
          ], 
          ", "
        )
      }, 
      (i, _) => {
        {Text.From(i{0}) & " - " & Text.From(i{1}), "> " & Text.From(i{0})}{
          Byte.From(List.Count(i) = 1)
        }, 
        _
      }
    ), 
    {"Salary Range", "Staffs"}
  )
in
  S
Power Query solution 2 for Data Grouping, proposed by Kris Jaganah:
let
  A = (z) => Excel.CurrentWorkbook(){[Name = z]}[Content], 
  B = Table.AddColumn(
    A("Table2"), 
    "Staffs", 
    each 
      let
        a = (x) => Number.From(Text.Trim(Text.SplitAny([Salary Range], "->"){x})), 
        b = (v, w) => if a(0) = null then v else a(w), 
        c = Text.Combine(
          Table.SelectRows(
            A("Table32"), 
            (y) => y[Salary] >= b(a(1), 0) and y[Salary] <= b(1 / 0, 1)
          )[Staff], 
          ", "
        )
      in
        c
  )
in
  B
Power Query solution 3 for Data Grouping, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table3"]}[Content], 
  Lists = List.Transform(List.Split({0 .. 15000}, 5000), each List.Select(_, (x) => x >= 1000)), 
  Ranges = List.Transform(
    Lists, 
    each 
      if List.Count(_) > 1 then
        Text.From(_{0}) & "-" & Text.From(List.Last(_))
      else
        ">" & Text.From(_{0})
  ), 
  Staff = List.Transform(
    Lists, 
    each Text.Combine(
      Table.SelectRows(
        Source, 
        (x) => if List.Count(_) > 1 then List.Contains(_, x[Salary]) else x[Salary] >= 15000
      )[Staff], 
      ", "
    )
  ), 
  Sol = Table.FromColumns({Ranges, Staff}, {"Salary Range", "Staffs"})
in
  Sol
Power Query solution 4 for Data Grouping, proposed by Abdallah Ally:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table3"]}[Content], 
  f = (x) =>
    if x < 5000 then
      "1000 - 4999"
    else if x < 10000 then
      "5000 - 9999"
    else if x < 15000 then
      "10000 - 14999"
    else
      ">= 15000", 
  AddColumn = Table.AddColumn(Source, "Salary Range", each f([Salary])), 
  Transform = List.Transform(
    {"1000 - 4999", "5000 - 9999", "10000 - 14999", ">= 15000"}, 
    each {_, Text.Combine(Table.SelectRows(AddColumn, (x) => x[Salary Range] = _)[Staff], ", ")}
  ), 
  Result = Table.FromRows(Transform, {"Salary Range", "Staffs"})
in
  Result
Power Query solution 5 for Data Grouping, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
  S1 = Excel.CurrentWorkbook(){[Name = "SalaryRng"]}[Content], 
  B = Table.AddColumn(S1, "Condition", each Text.BeforeDelimiter([Salary Range], " - "), type text), 
  C = Table.ReplaceValue(B, ">", "", Replacer.ReplaceText, {"Condition"}), 
  D = Table.TransformColumnTypes(C, {{"Condition", Int64.Type}}), 
  S2 = Excel.CurrentWorkbook(){[Name = "Table3"]}[Content], 
  E = Table.AddColumn(
    S2, 
    "Rng", 
    each List.Last(Table.SelectRows(D, (x) => [Salary] > x[Condition])[#"Salary Range"])
  ), 
  G = Table.AddColumn(
    S1, 
    "Staff", 
    each Text.Combine(Table.SelectRows(E, (n) => [Salary Range] = n[Rng])[Staff], ",")
  )
in
  G
Power Query solution 6 for Data Grouping, proposed by Luke Jarych:
let
  Source = Excel.CurrentWorkbook(){[Name = "tblRanges"]}[Content], 
  AddRangeFrom = Table.AddColumn(
    Source, 
    "RangeFrom", 
    each 
      if Text.StartsWith([Salary Range], ">") then
        Number.FromText(Text.Trim(Text.AfterDelimiter([Salary Range], ">")))
      else
        Number.FromText(Text.BeforeDelimiter([Salary Range], "-"))
  ), 
  AddRangeTo = Table.AddColumn(
    AddRangeFrom, 
    "RangeTo", 
    each 
      if Text.StartsWith([Salary Range], ">") then
        null
      else
        Number.FromText(Text.Trim(Text.AfterDelimiter([Salary Range], "-")))
  ), 
  AddStaffs = Table.AddColumn(
    AddRangeTo, 
    "Staffs", 
    each 
      let
        RangeFrom = [RangeFrom], 
        RangeTo = [RangeTo], 
        StaffInRange = Table.SelectColumns(
          Table.SelectRows(
            tblStaff, 
            each [Salary] >= RangeFrom and (RangeTo = null or [Salary] <= RangeTo)
          ), 
          {"Staff"}
        ), 
        Combined = Text.Combine(List.Transform(StaffInRange[Staff], each Text.From(_)), ", ")
      in
        Combined
  )[[Salary Range], [Staffs]]
in
  AddStaffs

Solving the challenge of Data Grouping with Excel

Excel solution 1 for Data Grouping, proposed by Rick Rothstein:
=DROP(
   REDUCE(
       "",
       {1,
       2,
       3,
       4},
       LAMBDA(
           a,
           x,
           VSTACK(
               a,
               TEXTJOIN(
                   ", ",
                   ,
                   FILTER(
                       B3:B9,
                       x=XMATCH(
                           C3:C9,
                           {0,
                           4999,
                           9999,
                           14999},
                           -1),
                       ""))))),
   1)

This formula creates all the headers along with the staff names...
=VSTACK(
   {"Salary Range",
   "Staffs"},
   HSTACK(
       {"1000-4999";"5000-9999";"10000-14999";">14999"},
       DROP(
           REDUCE(
               "",
               {1,
               2,
               3,
               4},
               LAMBDA(
                   a,
                   x,
                   VSTACK(
                       a,
                       TEXTJOIN(
                           ", ",
                           ,
                           FILTER(
                               B3:B9,
                               x=XMATCH(
                           C3:C9,
                           {0,
                           4999,
                           9999,
                           14999},
                           -1),
                               ""))))),
           1)))
Excel solution 2 for Data Grouping, proposed by 🇰🇷 Taeyong Shin:
=LET(d,
   FLOOR(
       VSTACK(
           C3:C9,
           10000),
       5000),
   f,
   LAMBDA(
       v,
       fn,
       n,
       MAP(
           v,
           LAMBDA(
               x,
               fn(
                   x,
                   n)))),
   s,
   f(
       f(
           d,
           MAX,
           1000),
       MIN,
       15000),
   DROP(GROUPBY(HSTACK(s,
   IF(d>14999,
   ">"&15000,
   s&-(d+4999))),
   T(
       +B3:B10),
   ARRAYTOTEXT,
   ,
   0),
   ,
   1))
Excel solution 3 for Data Grouping, proposed by Kris Jaganah:
=LET(
   a,
   Table3[Staff],
   b,
   Table3[Salary],
   c,
   E3:E6,
   d,
   XLOOKUP(
       b,
       --TEXTAFTER(
           c,
           {"-",
           ">"}),
       c,
       "> 15000",
       1),
   HSTACK(
       c,
       MAP(
           c,
           LAMBDA(
               x,
               ARRAYTOTEXT(
                   FILTER(
                       a,
                       d=x,
                       ""))))))
Excel solution 4 for Data Grouping, proposed by Julian Poeltl:
=LET(S,
   C3:C9,
   R,
   VSTACK(
       "1000 - 4999",
       "5000 - 9999",
       "10000 - 14999",
       "> 15000"),
   HSTACK(R,
   MAP(R,
   LAMBDA(A,
   TEXTJOIN(", ",
   ,
   IFNA(FILTER(B3:B9,
   (S>=--TEXTBEFORE(
       A,
       "-"))*(S<=--TEXTAFTER(
       A,
       "-")),
   ""),
   FILTER(
       B3:B9,
       S>=--TEXTAFTER(
           A,
           ">"))))))))
Excel solution 5 for Data Grouping, proposed by Hussein SATOUR:
=LET(
   r,
   E3:E6,
   a,
   --TEXTBEFORE(
       SUBSTITUTE(
           r,
           ">",
           "")&"-",
       "-"),
   MAP(
       r,
       LAMBDA(
           x,
           ARRAYTOTEXT(
               FILTER(
                   B3:B9,
                   LOOKUP(
                       C3:C9,
                       a,
                       r)=x,
                   "")))))
Excel solution 6 for Data Grouping, proposed by Oscar Mendez Roca Farell:
=LET(
   r,
    5e3*{0.2,
    1,
    2,
    3},
    TRANSPOSE(
        VSTACK(
            IFNA(
                r&-DROP(
                    r,
                     ,
                     1)+1,
                 ">"&MAX(
                      r)),
             BYCOL(
                 IFS(
                     LOOKUP(
                         Table3[Salary],
                          r)=r ,
                     Table3[Staff]),
                  LAMBDA(
                      c,
                       IFERROR(
                           ARRAYTOTEXT(
                               TOCOL(
                                   c,
                                    2)),
                            ""))))))
Excel solution 7 for Data Grouping, proposed by Sunny Baggu:
=LET(
a,
    {1000; 5000; 10000; 15000},
   
b,
    VSTACK(
        DROP(
            a,
             1),
         TAKE(
             a,
              -1) + 100000),
   
c,
    IFNA(
        HSTACK(
            a & " - " & DROP(
                b,
                 -1) - 1),
         "> " & TAKE(
             a,
              -1)),
   
HSTACK(
c,
   
MAP(
a,
   
b,
   
LAMBDA(x,
    y,
   
TEXTJOIN(
", ",
   
,
   
IF((Table3[Salary] >= x) * (Table3[Salary] < y),
    Table3[Staff],
    ""))))))
Excel solution 8 for Data Grouping, proposed by Hamidi Hamid:
=LET(x,
   DROP(
       TEXTBEFORE(
           E3:E6,
           {"-"},
           ,
           0,
           1)*1,
       -1),
   t,
   VSTACK(
       x,
       XLOOKUP(
           TRUE,
           x,
           x,
           ,
           -1)+1),
   r,
   IFERROR(
       TEXTAFTER(
           E3:E6,
           {"-"},
           ,
           0,
           0),
       100000)*1,
   MAP(t,
   r,
   LAMBDA(a,
   b,
   TEXTJOIN(",",
   1,
   IF((((Table3[Salary]>=a)*(Table3[Salary]<=b))),
   Table3[Staff],
   "")))))
Excel solution 9 for Data Grouping, proposed by Asheesh Pahwa:
=LET(
   s,
   C3:C9,
   sr,
   E3:E6,
   x,
   XLOOKUP(
       s,
       --TEXTAFTER(
           sr,
           {" - ",
           ">"}),
       sr,
       "> 15000",
       1),
   HSTACK(
       sr,
       MAP(
           sr,
           LAMBDA(
               z,
               ARRAYTOTEXT(
                   FILTER(
                       B3:B9,
                       x=z,
                       ""))))))
Excel solution 10 for Data Grouping, proposed by Meganathan Elumalai:
=LET(
   Stf,
   B3:B9,
   _lv,
   {0;1000;5000;10000;15000},
   _rv,
   {"0-999";"1000-4999";"5000-9999";"10000-14999";">=15000"},
   Cat,
   LOOKUP(
       C3:C9,
       _lv,
       _rv),
   Srng,
   DROP(
       _rv,
       1),
   HSTACK(
       Srng,
       MAP(
           Srng,
           LAMBDA(
               x,
               ARRAYTOTEXT(
                   FILTER(
                       Stf,
                       Cat=x,
                       ""))))))
Excel solution 11 for Data Grouping, proposed by Gerson Pineda:
=LET(z,
   5000,
   i,
   z*{0.2;1;2;3},
   f,
   z*{1;2;3;9}-1,
   s,
   Table3[Salary],
   HSTACK(VSTACK(
       TAKE(
           i&"-"&f,
           3),
       ">"&z*3),
   MAP(i,
   f,
   LAMBDA(x,
   y,
   ARRAYTOTEXT(FILTER(Table3[Staff],
   (s>=x)*(s<=y),
   ""))))))
Excel solution 12 for Data Grouping, proposed by Mey Tithveasna:
=REDUCE(B2,
   SEQUENCE(
       4),
   LAMBDA (b,
   c,
   VSTACK(
       b,
       ARRAYTOTEXT(
           
           FILTER(
               B3:B9,
               MATCH(
                   C3:C9,
                   {0,
                   4999,
                   9999,
                   14999},
                   1)=b,
               "")))))
Excel solution 13 for Data Grouping, proposed by Milan Shrimali:
=LET(
   DATA,
   BYROW(
       B2:C8,
       LAMBDA(
           X,
           LET(
               A,
               CHOOSECOLS(
                   X,
                   2),
               HSTACK(
                   CHOOSECOLS(
                       X,
                       1),
                   IFS(
                       AND(
                           A>=1000,
                           A<=4999),
                       1,
                       AND(
                           A>=5000,
                           A<=9999),
                       2,
                       AND(
                           A>=10000,
                           A<=14999),
                       3,
                       A>=15000,
                       4))))),
   B,
   HSTACK(
       VSTACK(
           1,
           2,
           3,
           4),
       VSTACK(
           "1000-4999",
           "5000-9999",
           "10000-14999",
           ">15000")),
   HSTACK(
       CHOOSECOLS(
           B,
           2),
       MAP(
           CHOOSECOLS(
               B,
               1),
           LAMBDA(
               X,
               IFERROR(
                   JOIN(
                       ",",
                       FILTER(
                           CHOOSECOLS(
                               DATA,
                               1),
                           CHOOSECOLS(
                               DATA,
                               2)=X)),
                   "")))))
Excel solution 14 for Data Grouping, proposed by Md Ismail Hosen:
=LAMBDA(
   staff,
   salary,
   salary_range,
    LET(
        
         _IsGreaterThanOnly,
         LEFT(
             salary_range,
              1) = ">",
        
         _End,
         TEXTAFTER(
             salary_range,
              "-") * 1,
        
         _Start,
         IF(
             
              _IsGreaterThanOnly,
             
              TEXTAFTER(
                  salary_range,
                   "> "),
             
              TEXTBEFORE(
                  salary_range,
                   "-")
              )
         * 1,
        
         _GreaterOrEqualToMask,
         salary >= _Start,
        
         _LessThanOrEqualToMask,
         IF(
             
              _IsGreaterThanOnly,
             
              salary = salary,
             
              salary <= _End
              ),
        
         _Mask,
         _GreaterOrEqualToMask * _LessThanOrEqualToMask,
        
         _Result,
         IFERROR(
             TEXTJOIN(
                 ",",
                  FALSE,
                  FILTER(
                      staff,
                       _Mask)),
              ""),
        
         _Result
        ))(Table3[Staff],
    Table3[Salary],
    E4)
Excel solution 15 for Data Grouping, proposed by Md. Shah Alam, Microsoft Certified Trainer:
=LET(
   x,
   C3:C9,
   y,
   SWITCH(
       TRUE,
       x>=15000,
       ">15000",
       x>=10000,
       "10000-14999",
       x>=5000,
       "5000-9999",
       "1000-4999"),
   z,
   UNIQUE(
       y),
   w,
   MAP(
       z,
       LAMBDA(
           b,
           TEXTJOIN(
               ",",
               ,
               FILTER(
                   B3:B9,
                   y=b),
               ""))),
   HSTACK(
       z,
       w))
Excel solution 16 for Data Grouping, proposed by Olufemi O.:
=IFS(
   
    AND(
        B2 >= 1000,
         B2 <= 4999),
    "1000 - 4999",
   
    AND(
        B2 >= 5000,
         B2 <= 9999),
    "5000 - 9999",
   
    AND(
        B2 >= 10000,
         B2 <= 14999),
    "10000 - 14999",
   
    B2 > 15000,
    "> 15000",
   
    TRUE,
    "Other"
   )

Solving the challenge of Data Grouping with Python

Python solution 1 for Data Grouping, proposed by Konrad Gryczan, PhD:
import pandas as pd
path = "files/Excel Challenge September 15th.xlsx"
input = pd.read_excel(path, usecols = "B:C", skiprows = 1, nrows = 7)
test = pd.read_excel(path, usecols = "E:F", skiprows = 1, nrows = 4).fillna("")
result = input.assign(Salary_Range = lambda x: pd.cut(x["Salary"], 
 bins = [1000, 5000, 10000, 15000, 100000], 
 labels = ["1000 - 4999", "5000 - 9999", "10000 - 14999", "> 15000"]))
result = result.groupby("Salary_Range")["Staff"].apply(lambda x: ", ".join(x)).reset_index(name="Staffs")
print(result)
print(test)
Python solution 2 for Data Grouping, proposed by Luke Jarych:
import pandas as pd
import xlwings as xw
wb = xw.Book(r'Excel Challenge September 15th.xlsx')
sh = wb.sheets[0]
table1 = sh.tables['tblStaff']
rng1 = sh.range(table1.range.address)
df1 = rng1.options(pd.DataFrame, header=True, index=False, numbers=float).value
table2 = sh.tables['tblRanges']
rng2 = sh.range(table2.range.address)
df2 = rng2.options(pd.DataFrame, header=True, index=False, numbers=int).value
salary_ranges = {}
for _, row in df2.iterrows():
 range_str = row['Salary Range']
 if '-' in range_str:
 min_salary, max_salary = map(int, range_str.split('-'))
 elif '>' in range_str:
 min_salary = int(range_str.split('>')[1].strip())
 max_salary = float('inf')
 else:
 continue
 salary_ranges[(min_salary, max_salary)] = []
for _, row in df1.iterrows():
 salary = row['Salary']
 for (min_salary, max_salary) in salary_ranges.keys():
 if min_salary <= salary <= max_salary:
 salary_ranges[(min_salary, max_salary)].append(row['Staff'])
 break

Solving the challenge of Data Grouping with Python in Excel

Python in Excel solution 1 for Data Grouping, proposed by Abdallah Ally:
# Create a function to group salaries
def salary_range(salary):
 if salary < 5000:
 return "1000 - 4999"
 elif salary < 10000:
 return "5000 - 9999"
 elif salary < 15000:
 return "10000 - 14999"
 else:
 return ">= 15000"
df = xl("B2:C9", headers=True)
# Perform data manipulation
salary_ranges = ["1000 - 4999", "5000 - 9999", "10000 - 14999", ">= 15000"]
df1 = pd.DataFrame(data={"Salary Range": salary_ranges})
df['Salary Range'] = df['Salary'].map(salary_range)
df2 = df.groupby('Salary Range').agg(Staffs=('Staff', lambda x: ', '.join(x)))
df = pd.merge(df1, df2, on='Salary Range', how='left').fillna('')
df
Python in Excel solution 2 for Data Grouping, proposed by Ümit Barış Köse, MSc:
df=xl("B2:C9", headers=True)
def categorize_salary(salary):
 if 1000 <= salary <= 4999:
 return '1000 - 4999'
 elif 5000 <= salary <= 9999:
 return '5000 - 9999'
 elif 10000 <= salary <= 14999:
 return '10000 - 14999'
 else:
 return '> 15000'
df['Salary Range'] = df['Salary'].apply(categorize_salary)
result = df.groupby('Salary Range')['Staff'].agg(lambda x: ', '.join(x)).reindex(
 ['1000 - 4999', '5000 - 9999', '10000 - 14999', '> 15000']
).reset_index()
result['Staff'] = result['Staff'].fillna('')
result
Python in Excel solution 3 for Data Grouping, proposed by George Mount:
Python in Excel solution: https://github.com/stringfestdata/crispo-excel-challenge/raw/main/crispo-excel-challenge-gjm-solution-09152024.xlsx
# Data import
df = xl("Table3[
#All]", headers=True)
# Define salary ranges and labels
bins = [999, 4999, 9999, 14999, np.inf]
labels = ['1000 - 4999', '5000 - 9999', '10000 - 14999', '> 15000']
# Create binned column
df['salary range'] = pd.cut(df['salary'], bins=bins, labels=labels)
# Ensure all salary ranges are included even if empty
df['salary range'] = pd.Categorical(df['salary range'], categories=labels, ordered=True)
# Group by salary range, add staff names
df_grouped = df.groupby('salary range', observed=False)['staff'].apply(', '.join).reset_index()
df_grouped['staff'] = df_grouped['staff'].fillna('')
df_grouped

Solving the challenge of Data Grouping with R

R solution 1 for Data Grouping, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "files/Excel Challenge September 15th.xlsx"
input = read_excel(path, range = "B2:C9")
test = read_excel(path, range = "E2:F6") %>% na.omit()
result = input %>%
 mutate(`Salary Range` = cut(x = Salary, 
 breaks = c(-Inf, 1000, 5000, 10000, 15000, Inf), 
 labels = c("<1000", "1000 - 4999", "5000 - 9999", "10000 - 14999", "> 15000"))) %>%
 summarise(Staffs = paste0(Staff, collapse = ", "), 
 .by = `Salary Range`)
cbind(result, test)
# identical, order of names in concatenation is different.

Solving the challenge of Data Grouping with DAX

DAX solution 1 for Data Grouping, proposed by Ümit Barış Köse, MSc:
#DAX in 
#Power_BI Solution
StaffsBySalaryRange = 
VAR SelectedRange = MAX('SalaryRanges'[Salary Range])
VAR StaffList = 
 CONCATENATEX(
 FILTER(
 'Table3',
 'Table3'[Salary Range] = SelectedRange
 ),
 'Table3'[Staff],
 ", ",
 'Table3'[Staff],
 ASC
 )
RETURN
 IF(
 ISBLANK(StaffList),
 " ", 
 StaffList
 )
SalaryRanges = 
DATATABLE(
 "Salary Range", STRING,
 {
 {"1000 - 4999"},
 {"5000 - 9999"},
 {"10000 - 14999"},
 {"> 15000"}
 }
)
Salary Range = 
SWITCH(
 TRUE(),
 'Table3'[Salary] <= 4999, "1000 - 4999",
 'Table3'[Salary] <= 9999, "5000 - 9999",
 'Table3'[Salary] <= 14999, "10000 - 14999",
 "> 15000"
)
SortOrder = SWITCH (
 'SalaryRanges'[Salary Range],
 "1000 - 4999", 1,
 "5000 - 9999", 2,
 "10000 - 14999", 3,
 "> 15000", 4
)

Leave a Reply