Home »  Identifying Customers Staple Products!

 Identifying Customers Staple Products!

Solving  Identifying Customers Staple Products challenge by Power Query, Power BI, Excel, Python and R

In the question table, sales transactions are listed. We aim to determine the most purchased products for each customer, based on the total quantity sold.

📌 Challenge Details and Links
Challenge Number: 29
Challenge Difficulty: ⭐⭐⭐
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of  Identifying Customers Staple Products! with Power Query

Power Query solution 1 for  Identifying Customers Staple Products!, proposed by Brian Julius:
let
  S = Table.RemoveColumns(Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], "Date"), 
  G1a = Table.Group(
    Table.Group(S, {"Customer ID", "Product"}, {{"TotalQuantity", each List.Sum([Quantity])}}), 
    {"Customer ID"}, 
    {{"MaxQ", each List.Max([TotalQuantity])}, {"All", each _}}
  ), 
  Exp = Table.RemoveColumns(
    Table.SelectRows(
      Table.ExpandTableColumn(
        G1a, 
        "All", 
        {"Product", "TotalQuantity"}, 
        {"Product", "TotalQuantity"}
      ), 
      each [MaxQ] = [TotalQuantity]
    ), 
    {"MaxQ", "TotalQuantity"}
  ), 
  G2 = Table.Group(
    Exp, 
    {"Customer ID"}, 
    {"MostPurchasedProd", each List.Sort([Product], Order.Ascending)}
  ), 
  Extr = Table.TransformColumns(
    G2, 
    {"MostPurchasedProd", each Text.Combine(List.Transform(_, Text.From), ",")}
  )
in
  Extr
Power Query solution 2 for  Identifying Customers Staple Products!, proposed by Luan Rodrigues:
let
  Fonte = Tabela1, 
  res = Table.Group(
    Fonte, 
    {"Customer ID"}, 
    {
      {
        "Contagem", 
        each 
          let
            a = Table.Group(_, {"Product"}, {"count", each List.Sum(_[Quantity])}), 
            b = Table.SelectRows(a, each [count] = List.Max(a[count]))[Product]
          in
            Text.Combine(b, ", ")
      }
    }
  )
in
  res
Power Query solution 3 for  Identifying Customers Staple Products!, proposed by Ramiro Ayala Chávez:
let
S = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
T = Table.TransformColumns,
G = Table.Group,
a = G(S,{"Customer ID"},{"G", each G([[Product],[Quantity]],{"Product"},{"H", each List.Sum([Quantity])})}),
b = T(a,{"G", each Table.MaxN(G(_,{"H"},{"J", each _}),"H",1)}),
c = Table.ExpandTableColumn(b,"G",{"J"},{"J"}),
d = T(c,{"J", each Text.Combine(List.Sort([Product]),",")}),
Sol = Table.RenameColumns(d,{{"Customer ID","CUSTOMER"},{"J","Most Purchased PRODUCT"}})
in
Sol
Power Query solution 4 for  Identifying Customers Staple Products!, proposed by Aditya Kumar Darak 🇮🇳:
let
  Source = Excel.CurrentWorkbook(){[Name = "data"]}[Content], 
  Group = Table.Group(Source, {"Customer ID", "Product"}, {"Total", each List.Sum([Quantity])}), 
  Return = Table.Group(
    Group, 
    "Customer ID", 
    {
      "Most Product Purchased", 
      each [
        M = Table.Max(_, "Total")[Total], 
        F = Table.SelectRows(_, (f) => f[Total] = M)[Product], 
        R = Text.Combine(List.Sort(F), ", ")
      ][R]
    }
  )
in
  Return
Power Query solution 5 for  Identifying Customers Staple Products!, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  A = Table.Group(
    Source, 
    {"Customer ID", "Product"}, 
    {{"TotalQty", each List.Sum([Quantity]), type number}}
  ), 
  B = Table.Group(A, {"Customer ID"}, {{"Max", each List.Max([TotalQty]), type number}}), 
  C = Table.AddColumn(
    B, 
    "Most Perchased Product", 
    each Text.Combine(
      List.Sort(
        Table.SelectRows(A, (X) => X[Customer ID] = [Customer ID] and X[TotalQty] = [Max])[Product]
      ), 
      ","
    )
  ), 
  Sol = Table.SelectColumns(C, {"Customer ID", "Most Perchased Product"})
in
  Sol
Power Query solution 6 for  Identifying Customers Staple Products!, proposed by Glyn Willis:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  #"Changed Type" = Table.TransformColumnTypes(
    Source, 
    {
      {"Date", type date}, 
      {"Customer ID", type text}, 
      {"Product", type text}, 
      {"Quantity", Int64.Type}
    }
  ), 
  #"Grouped Rows" = Table.Group(
    #"Changed Type", 
    {"Customer ID"}, 
    {
      {
        "Most Purchased", 
        each 
          let
            Grp = Table.Group(_, {"Product"}, {{"Quantity", each List.Sum([Quantity])}}), 
            m   = List.Max(Grp[Quantity])
          in
            Text.Combine(List.Sort(Table.SelectRows(Grp, (x) => x[Quantity] = m)[Product]), ", "), 
        type nullable text
      }
    }
  )
in
  #"Grouped Rows"

Solving the challenge of  Identifying Customers Staple Products! with Excel

Excel solution 1 for  Identifying Customers Staple Products!, proposed by Bo Rydobon 🇹🇭:
=LET(
    g,
    GROUPBY(
        C3:D36,
        E3:E36,
        SUM,
        ,
        0
    ),
    h,
    GROUPBY(
        CHOOSECOLS(
            g,
            1,
            3
        ),
        INDEX(
            g,
            ,
            2
        ),
        ARRAYTOTEXT,
        ,
        0,
        -2
    ),
    GROUPBY(
        TAKE(
            h,
            ,
            1
        ),
        DROP(
            h,
            ,
            2
        ),
        SINGLE,
        ,
        0
    )
)
Excel solution 2 for  Identifying Customers Staple Products!, proposed by 🇰🇷 Taeyong Shin:
=LET(     c,
     C3:C36,
     p,
     D3:D36,     func,
     LAMBDA(
         x,
          ARRAYTOTEXT(
              SORT(
                  UNIQUE(
                      FILTER(
                          UNPACK(
                              x,
                               1
                          ),
                           UNPACK(
                               x,
                                2
                           ) = UNPACK(
                               x,
                                2,
                                MAX
                           )
                      )
                  )
              )
          )
     ),     GROUPBY(
         c,
          PACK(
              HSTACK(
                  p,
                   SUMIFS(
                       E3:E36,
                        c,
                        c,
                        p,
                        p
                   )
              )
          ),
          func,
          ,
          0
     ))
Excel solution 3 for  Identifying Customers Staple Products!, proposed by Oscar Mendez Roca Farell:
=LET(
    C,
     C3:C36,
     D,
     D3:D36,
     UC,
     UNIQUE(
         C
     ),
     UD,
     TOROW(
         SORT(
             UNIQUE(
                 D
             )
         )
     ),
     HSTACK(
         UC,
          BYROW(
              SUMIFS(
                  E3:E36,
                   C,
                   UC,
                   D,
                   UD
              ),
               LAMBDA(
                   r,
                    TEXTJOIN(
                        ",",
                         ,
                         REPT(
                             UD,
                              r=MAX(
                                  r
                              )
                         )
                    )
               )
          )
     )
)
Excel solution 4 for  Identifying Customers Staple Products!, proposed by Julian Poeltl:
=LET(
    T,
    B2:E36,
    TT,
    DROP(
        T,
        1
    ),
    ID,
    CHOOSECOLS(
        TT,
        2
    ),
    P,
    CHOOSECOLS(
        TT,
        3
    ),
    Q,
    CHOOSECOLS(
        TT,
        4
    ),
    UID,
    UNIQUE(
        ID
    ),
    UP,
    UNIQUE(
        P
    ),
    Umix,
    UNIQUE(
        ID&P
    ),
    C,
    MAP(
        Umix,
        LAMBDA(
            A,
            SUM(
                FILTER(
                    Q,
                    ID&P=A
                )
            )
        )
    ),
    IDCUM,
    LEFT(
        Umix,
        3
    ),
    PCUM,
    RIGHT(
        Umix,
        1
    ),
    R,
    MAP(
        UID,
        LAMBDA(
            A,
            LET(
                F,
                FILTER(
                    HSTACK(
                        PCUM,
                        C
                    ),
                    IDCUM=A
                ),
                M,
                MAX(
                    CHOOSECOLS(
                        F,
                        2
                    )
                ),
                TEXTJOIN(
                    ",",
                    ,
                    SORT(
                        FILTER(
                            CHOOSECOLS(
                                F,
                                1
                            ),
                            CHOOSECOLS(
                        F,
                        2
                    )=M
                        )
                    )
                )
            )
        )
    ),
    VSTACK(
        HSTACK(
            "CUSTOMER",
            "Most Purchased PRODUCT"
        ),
        HSTACK(
            UID,
            R
        )
    )
)
Excel solution 5 for  Identifying Customers Staple Products!, proposed by Kris Jaganah:
=LET(a,
    C3:C36,
    b,
    D3:D36,
    c,
    E3:E36,
    d,
    UNIQUE(
        a&b
    ),
    e,
    MAP(d,
    LAMBDA(x,
    SUM((a&b=x)*c))),
    f,
    LEFT(
        d,
        3
    ),
    g,
    RIGHT(
        d
    ),
    h,
    MAP(f,
    LAMBDA(y,
    MAX((f=y)*e))),
    i,
    UNIQUE(
        f
    ),
    HSTACK(i,
    DROP(REDUCE("",
    i,
    LAMBDA(x,
    y,
    VSTACK(x,
    TEXTJOIN(",",
    ,
    SORT(FILTER(g,
    (e=h)*(f=y))))))),
    1)))
Excel solution 6 for  Identifying Customers Staple Products!, proposed by John Jairo Vergara Domínguez:
=LET(i,
    C3:C36,
    p,
    D3:D36,
    v,
    SUMIFS(
        E3:E36,
        i,
        i,
        p,
        p
    ),
    u,
    UNIQUE(
        i
    ),
    HSTACK(u,
    MAP(u,
    LAMBDA(x,
    TEXTJOIN(",",
    ,
    SORT(UNIQUE(REPT(p,
    v*(i=x)=MAX(v*(i=x))))))))))
Excel solution 7 for  Identifying Customers Staple Products!, proposed by JvdV –:
=LET(C,
    C3:C36,
    P,
    D3:D36,
    Q,
    SUMIFS(
        E3:E36,
        C,
        C,
        P,
        P
    ),
    U,
    UNIQUE(
        C
    ),
    HSTACK(U,
    MAP(U,
    LAMBDA(Y,
    TEXTJOIN(",",
    ,
    UNIQUE(FILTER(P,
    (C=Y)*(Q=MAX(Q*(C=Y))))))))))
Excel solution 8 for  Identifying Customers Staple Products!, proposed by Sunny Baggu:
=LET(
 _c,
     UNIQUE(
         C3:C36
     ), _up,
     TOROW(
         UNIQUE(
             D3:D36
         )
     ), HSTACK(
 _c, MAP(
 _c, LAMBDA(a, LET(
 _a,
     BYCOL((C3:C36 = a) * (D3:D36 = _up) * E3:E36,
     LAMBDA(
         a,
          SUM(
              a
          )
     )), ARRAYTOTEXT(
     FILTER(
         _up,
          _a = MAX(
              _a
          )
     )
 )
 )
 )
 )
 )
)
Excel solution 9 for  Identifying Customers Staple Products!, proposed by CA Raghunath Gundi:
=LET(c,
     C3:C36,
     p,
     D3:D36,
     up,
     SUMIFS(
         E3:E36,
         c,
         c,
         p,
         p
     ),
     uc,
     SORT(
         UNIQUE(
              c
         )
     ),
     HSTACK(uc,
    MAP(uc,
    LAMBDA(a,
     TEXTJOIN(", ",
    ,
    UNIQUE(FILTER(p,
    (c=a)*(up=MAX(up*(c=a))))))))))
Excel solution 10 for  Identifying Customers Staple Products!, proposed by Charles Roldan:
=LET(pList, {"A";"B";"C";"D";"E"}, REDUCE(I2:J2, I3:I6, LAMBDA(_,c, VSTACK(_, HSTACK(c, ARRAYTOTEXT(FILTER(pList, LAMBDA(x, x = MAX(x))(MAP(pList, LAMBDA(p, SUM(E3:E36 * (C3:C36 = c) * (D3:D36 = p))))))))))))
Excel solution 11 for  Identifying Customers Staple Products!, proposed by Hussein SATOUR:
=LET(
    a,
    GROUPBY(
        C3:D36,
        E3:E36,
        SUM
    ),
    MAP(
        I3:I6,
        LAMBDA(
            x,
            ARRAYTOTEXT(
                FILTER(
                    INDEX(
                        a,
                        ,
                        2
                    ),
                    INDEX(
                        a,
                        ,
                        3
                    )=MAX(
                        FILTER(
                            INDEX(
                        a,
                        ,
                        3
                    ),
                            INDEX(
                                a,
                                ,
                                1
                            )=x
                        )
                    )
                )
            )
        )
    )
)
Excel solution 12 for  Identifying Customers Staple Products!, proposed by Pieter de B.:
=LET(c,C3:C36,p,D3:D36,q,E3:E36,u,UNIQUE(c),HSTACK(u,MAP(u,LAMBDA(m,LET(x,FILTER(p,c=m),y,MMULT(N(TOROW(x)=UNIQUE(x)),FILTER(q,c=m)),ARRAYTOTEXT(FILTER(UNIQUE(x),y=MAX(y))))))))

Solving the challenge of  Identifying Customers Staple Products! with Python

Python solution 1 for  Identifying Customers Staple Products!, proposed by Konrad Gryczan, PhD:
import pandas as pd

input = pd.read_excel("CH-029 Identifying Customers Staple Products.xlsx", sheet_name="Sheet1", usecols="B:E", skiprows=1)
test = pd.read_excel("CH-029 Identifying Customers Staple Products.xlsx", sheet_name="Sheet1", usecols="I:J", skiprows=1, nrows = 4)

result = input.groupby(["Customer ID", "Product"]).agg(total_quantity=("Quantity", "sum")).reset_index()
result["rank"] = result.groupby("Customer ID")["total_quantity"].rank(ascending=False)
result["lowest_rank"] = result.groupby("Customer ID")["rank"].transform("min")
result = result[result["rank"] == result["lowest_rank"]]
result = result.groupby("Customer ID").agg({"Product": lambda x: ",".join(sorted(x))}).reset_index()
result = result.rename(columns={"Product": "Most Purchased PRODUCT"})

print(result["Most Purchased PRODUCT"].equals(test["Most Purchased PRODUCT"])) # True
Python solution 2 for  Identifying Customers Staple Products!, proposed by Luan Rodrigues:
import pandas as pd

file_path = r'CH-029 Identifying Customers Staple Products.xlsx'

df = pd.read_excel(file_path,header=0,usecols='c:e')
df.columns = ['Customer ID','Product','Quantity']


filtro = df.groupby('Customer ID').apply(
 lambda x: x.groupby('Product')['Quantity'].sum().max()
).reset_index(name='max')

filtro['chave'] = filtro['Customer ID'].astype(str) + filtro['max'].astype(str)

group_df = df.groupby(['Customer ID','Product'])['Quantity'].sum().reset_index()
group_df['chave'] = group_df['Customer ID'].astype(str) + group_df['Quantity'].astype(str)

filter_gp = group_df.merge(filtro, left_on='chave',right_on='chave',how='inner')

def concatenate_values(group):
 return ','.join(group)

res = filter_gp.groupby('chave')['Product'].apply(concatenate_values)
 

print(res)
Python solution 3 for  Identifying Customers Staple Products!, proposed by Abdallah Ally:
import pandas as pd

# Create functions to generate the required results
# Function to get the maximum values for each row given a list of columns
def maximum_value(*values):
 return max(values)

# Function to get column names where values match 'maximum value' per row
def get_products(row):
 products = [col for col in df2.columns[1:-1] if row[col] == row['maximum_value']]
 return ','.join(products)

# Read the Excel file
file_path = 'CH-029 Identifying Customers Staple Products.xlsx'
df1 = pd.read_excel(file_path, usecols='I:J', nrows=4, skiprows=1) # Original data frame
df2 = pd.read_excel(file_path, usecols='B:E', skiprows=1) # Data frame for computation
df2 = df2.pivot_table(values='Quantity', index='Customer ID', columns='Product', aggfunc='sum').fillna(0)
df2 = df2.reset_index()

# Add columns to the dataset and print the output
df2['maximum_value'] = df2.apply(lambda x: maximum_value(x['A'], x['B'], x['C'], x['D'], x['E']), axis=1)
df2['Most Purchased PRODUCT'] = df2.apply(get_products, axis=1)
df2 = df2.rename_axis(None, axis=1).rename(columns={'Customer ID': 'Customer'})

print(f'nExpected Answer:n{df1} nnMy Answer: n{df2.iloc[:, [0, -1]]}')

Solving the challenge of  Identifying Customers Staple Products! with R

R solution 1 for  Identifying Customers Staple Products!, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)

input = read_excel("files/CH-029 Identifying Customers Staple Products.xlsx", range = "B2:E36")
test = read_excel("files/CH-029 Identifying Customers Staple Products.xlsx", range = "I2:J6")

result = input %>%
 summarise(total_quantity = sum(Quantity), .by = c("Customer ID", "Product")) %>%
 group_by(`Customer ID`) %>%
 mutate(rank = rank(-total_quantity),
 lowest_rank = min(rank)) %>%
 filter(rank == lowest_rank) %>%
 summarise(`Most Purchased PRODUCT` = paste0(sort(Product), collapse = ","))

Leave a Reply