Home » Determining Missing Fields

Determining Missing Fields

Solving Determining Missing Fields challenge by Power Query, Power BI, Excel, Python and R

Customer information including Name, Family, Phone, and Website is provided in the question table. Determine for each customer which fields are missing and which fields are duplicated.

📌 Challenge Details and Links
Challenge Number: 74
Challenge Difficulty: ⭐⭐⭐
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Determining Missing Fields with Power Query

Power Query solution 1 for Determining Missing Fields, proposed by Zoran Milokanović:
let
  Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content], 
  F = {"Name", "Family", "Phone", "Website"}, 
  P = Table.Group(
    Source, 
    "Info", 
    {"A", each [Info]}, 
    0, 
    (b, n) => Byte.From(List.PositionOf(F, b) >= List.PositionOf(F, n))
  )[A], 
  S = Table.FromRows(
    List.Transform(
      P, 
      each 
        let
          d = (b, n) =>
            let
              d = List.Difference(b, n)
            in
              Text.Combine({d, {"-"}}{Byte.From(d = {})}, ", ")
        in
          {List.PositionOf(P, _) + 1, d(F, _), d(_, F)}
    ), 
    {"Record No", "Missing Fields", "Duplicate fields"}
  )
in
  S
Power Query solution 2 for Determining Missing Fields, proposed by Aditya Kumar Darak 🇮🇳:
let
  Source = Excel.CurrentWorkbook(){[Name = "data"]}[Content], 
  AllHead = List.Distinct(Source[Info]), 
  Group = Table.Group(
    Source, 
    "Info", 
    {
      "R", 
      each [
        U         = Table.Distinct(_, "Info"), 
        F1        = [Info], 
        F2        = U[Info], 
        Missing   = Text.Combine(List.Difference(AllHead, F2), ", "), 
        Duplicate = Text.Combine(List.Difference(F1, F2), ", ")
      ]
    }, 
    0, 
    (x, y) => Number.From(y = "Name")
  ), 
  Table = Table.FromRecords(Group[R], type table [Missing = text, Duplicate = text]), 
  Index = Table.AddIndexColumn(Table, "Record No", 1, 1, Int64.Type), 
  Return = Table.ReorderColumns(Index, {"Record No", "Missing", "Duplicate"})
in
  Return
Power Query solution 3 for Determining Missing Fields, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
 Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
 Group = Table.Combine(Table.Group(Source, {"Info"}, {{"A", each 
let
a = [Info],
b = List.Distinct(Source[Info]),
c = Text.Combine(List.Difference(b,a), ", "),
d = List.Select(b, each List.Count(List.Select(a, (x)=> (Text.Contains(x,_))))>1){0}? ??""
in Table.FromColumns({{c},{d}}, {"Missing Field", "Duplicate Fields"})
}},0, (a,b)=> Number.From(b[Info] = "Name"))[A]),
 Sol = Table.FromColumns({{1..List.Count(List.Select(Source[Info], each _ = "Name"))}}
 &Table.ToColumns(Group),{"Record No."}&Table.ColumnNames(Group))
in
 Sol
Power Query solution 4 for Determining Missing Fields, proposed by Yaroslav Drohomyretskyi:
let
  Джерело = Excel.CurrentWorkbook(){[Name = "Таблиця1"]}[Content], 
  Result = Table.SelectColumns(
    Table.AddIndexColumn(
      Table.Group(
        Table.FillDown(
          Table.AddColumn(
            Table.AddIndexColumn(Джерело, "Index", 1, 1, Int64.Type), 
            "x", 
            each if [Info] = "Name" then [Index] else null
          ), 
          {"x"}
        ), 
        {"x"}, 
        {
          {
            "Missing Fields", 
            each Text.Combine(List.Difference(List.Distinct(Джерело[Info]), _[Info]), ", ")
          }, 
          {
            "Duplicate Fields", 
            each Text.Combine(List.Difference(_[Info], List.Distinct(Джерело[Info])), ", ")
          }
        }
      ), 
      "Record No", 
      1, 
      1, 
      Int64.Type
    ), 
    {"Record No", "Missing Fields", "Duplicate Fields"}
  )
in
  Result
Power Query solution 5 for Determining Missing Fields, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
  S = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  A = Table.AddIndexColumn(S, "In", 1, 1, Int64.Type), 
  B = Table.AddColumn(A, "Row", each if [Info] = "Name" then [In] else null), 
  C = Table.FillDown(B, {"Row"}), 
  D = Table.RemoveColumns(C, {"In"}), 
  E = Table.Group(
    D, 
    {"Row"}, 
    {{"Tbl", each _, type table [Info = text, Info2 = text, Custom = number]}}
  ), 
  F = Table.AddColumn(
    E, 
    "Missing", 
    each Text.Combine(List.Difference(List.Distinct(S[Info]), List.Distinct([Tbl][Info])), ", ")
  ), 
  DupF = (Dup) =>
    let
      TD  = Table.Group(Dup, {"Info"}, {{"Count", each Table.RowCount(_), Int64.Type}}), 
      TD2 = Table.SelectRows(TD, each [Count] <> 1), 
      TD3 = Text.Combine(TD2[Info], ", ")
    in
      TD3, 
  G = Table.AddColumn(F, "Duplicate", each DupF([Tbl])), 
  H = Table.AddIndexColumn(G, "Record No", 1, 1, Int64.Type), 
  I = Table.SelectColumns(H, {"Record No", "Missing", "Duplicate"})
in
  I

Solving the challenge of Determining Missing Fields with Excel

Excel solution 1 for Determining Missing Fields, proposed by Bo Rydobon 🇹🇭:
=LET(x,
    B3:B15,
    n,
    SCAN(0,
    x,
    LAMBDA(a,
    v,
    a+(v=@+x))),p,
    PIVOTBY(
        n,
        x,
        x,
        ROWS,
        ,
        0,
        ,
        0
    ),
    L,
    LAMBDA(
        c,
        BYROW(
            DROP(
                p,
                1,
                1
            ),
            LAMBDA(
                x,
                ARRAYTOTEXT(
                    FILTER(
                        DROP(
                            TAKE(
                                p,
                                1
                            ),
                            ,
                            1
                        ),
                        IF(
                            c,
                            N(
                                +x
                            )>1,
                            x=""
                        ),
                        "-"
                    )
                )
            )
        )
    ),VSTACK(
    F2:H2,
    HSTACK(
        UNIQUE(
            n
        ),
        L(
            0
        ),
        L(
            1
        )
    )
))
Excel solution 2 for Determining Missing Fields, proposed by 🇰🇷 Taeyong Shin:
=LET(
    d,
    B3:B15,
    n,
    SCAN(
        ,
        XMATCH(
            d&MAP(
                d,
                LAMBDA(
                    x,
                    COUNTIF(
                        A3:x,
                        x
                    )
                )
            ),
            "Name"&{1;2;3;4}
        ),
        LAMBDA(
            a,
            v,
            IFNA(
                v,
                 a
            )
        )
    ),
    VSTACK(
        F2:H2,
        DROP(
            GROUPBY(
                n,
                d,
                HSTACK(
                    LAMBDA(
                        x,
                        ARRAYTOTEXT(
                            IFERROR(
                                UNIQUE(
                                    VSTACK(
                                        B3:B6,
                                        x
                                    ),
                                    ,
                                    1
                                ),
                                "-"
                            )
                        )
                    ),
                    LAMBDA(
                        x,
                        LET(
                            u,
                            UNIQUE(
                                x
                            ),
                            ARRAYTOTEXT(
                                FILTER(
                                    u,
                                    MMULT(
                                        XMATCH(
                                            u,
                                            x,
                                            ,
                                            {-1,
                                            1}
                                        ),
                                        {1;-1}
                                    ),
                                    "-"
                                )
                            )
                        )
                    )
                ),
                ,
                0
            ),
            1
        )
    )
)
Excel solution 3 for Determining Missing Fields, proposed by محمد حلمي:
=LET(
    b,
    B3:B15,
    m,
    UNIQUE(
        b
    ),
    s,
    SEQUENCE(
        COUNTIF(
            b,
            B3
        )
    ),    HSTACK(
        s,
        MAP(
            {1,
            0}&s,
            LAMBDA(
                a,
                LET(
                    
                    i,
                    LEFT(
                        a
                    ),
                    v,
                    FILTER(
                        b,
                        a=i&SCAN(
                            0,
                            b=B3,
                            LAMBDA(
                                a,
                                v,
                                a+v
                            )
                        )
                    ),
                    
                    TEXTJOIN(
                        ", ",
                        ,
                        IF(
                            -i,
                            REPT(
                                m,
                                ISNA(
                                    XMATCH(
                                        m,
                                        v
                                    )
                                )
                            ),
                            
                            UNIQUE(
                                REPT(
                                    v,
                                    XMATCH(
                                        v,
                                        v
                                    )
Excel solution 4 for Determining Missing Fields, proposed by محمد حلمي:
=LET(
    b,
    B3:B15,
    k,
    SCAN(
        0,
        b=B3,
        LAMBDA(
            a,
            v,
            a+v
        )
    ),    REDUCE(
        F2:H2,
        UNIQUE(
            k
        ),
        LAMBDA(
            a,
            v,
            LET(
                
                y,
                UNIQUE(
                    b
                ),
                i,
                FILTER(
                    b,
                    k=v
                ),
                e,
                XMATCH(
                    y,
                    i
                ),
                VSTACK(
                    a,
                    HSTACK(
                        v,
                        
                        TEXTJOIN(
                            ", ",
                            ,
                            REPT(
                                y,
                                ISNA(
                                    e
                                )
                            )
                        ),
                        
                        TEXTJOIN(
                            ", ",
                            ,
                            REPT(
                                y,
                                IFNA(
                                    XMATCH(
                                        y,
                                        i,
                                        ,
                                        -1
                                    )>e,
                                    
                                )
                            )
                        )
                    )
                )
            )
        )
    )
)
Excel solution 5 for Determining Missing Fields, proposed by Oscar Mendez Roca Farell:
=LET(
    b,
     B3:B15,
     m,
     MAP(
         b,
          LAMBDA(
              b,
               COUNTIF(
                   B3:b,
                    "Name"
               )
          )
     ),
     REDUCE(
         F2:H2,
          UNIQUE(
              m
          ),
          LAMBDA(
              i,
               x,
               LET(
                   e,
                    UNIQUE(
                        b
                    ),
                    f,
                    FILTER(
                        b,
                         m=x
                    ),
                   
                   VSTACK(
                       i,
                        HSTACK(
                            x,
                             BYCOL(
                                 IF(
                                     MMULT(
                                         N(
                                             TOROW(
                                                 f
                                             )=e
                                         ),
                                          SEQUENCE(
                                              ROWS(
                                                 f
                                             )
                                          )^0
                                     )={0,
                                      2},
                                      e,
                                      ""
                                 ),
                                  LAMBDA(
                                      c,
                                       TEXTJOIN(
                                           ", ",
                                            ,
                                            c
                                       )
                                  )
                             )
                        )
                   )
               )
          )
     )
)
Excel solution 6 for Determining Missing Fields, proposed by Julian Poeltl:
=LET(
    Q,
    B3:C15,
    I,
    TAKE(
        Q,
        ,
        1
    ),
    U,
    UNIQUE(
        I
    ),
    BR,
    "Name"&DROP(
        TEXTSPLIT(
            TEXTJOIN(
                "|",
                ,
                I
            ),
            ,
            "Name"
        ),
        1
    ),
    NR,
    SEQUENCE(
        ROWS(
            BR
        )
    ),
    MF,
    IFERROR(
        MAP(
            BR,
            LAMBDA(
                A,
                TEXTJOIN(
                    ", ",
                    ,
                    FILTER(
                        U,
                        ISERROR(
                            SEARCH(
                                U,
                                A
                            )
                        )
                    )
                )
            )
        ),
        "-"
    ),
    DF,
    IFERROR(
        MAP(
            BR,
            LAMBDA(
                A,
                TEXTJOIN(
                    ", ",
                    ,
                    FILTER(
                        U,
                        ISNUMBER(
                            SEARCH(
                                U&"*"&U,
                                A
                            )
                        )
                    )
                )
            )
        ),
        "-"
    ),
    VSTACK(
        HSTACK(
            "Record No",
            "Missing Fields",
            "Duplicate Fields"
        ),
        HSTACK(
            NR,
            MF,
            DF
        )
    )
)
Excel solution 7 for Determining Missing Fields, proposed by Kris Jaganah:
=LET(a,
    B3:B15,
    b,
    C3:C15,
    c,
    SCAN(
        0,
        a,
        LAMBDA(
            x,
            y,
            IF(
                y="Name",
                x+1,
                x
            )
        )
    ),
    d,
    UNIQUE(
        a
    ),
    e,
    ROWS(
        d
    ),
    f,
    MAX(
        c
    ),
    g,
    INT(
        SEQUENCE(
            f,
            e,
            ,
            1/e
        )
    ),
    h,
    TOCOL(
        g
    ),
    i,
    TOCOL(
        g,
        ,
        1
    ),
    j,
    XMATCH(
        a,
        a
    ),
    k,
    PIVOTBY(
        h,
        XLOOKUP(
            h&i,
            c&K3#,
            c,
            0
        ),
        INDEX(
            a,
            i
        ),
        ARRAYTOTEXT
    ),
    l,
    UNIQUE(
        c
    ),
    m,
    GROUPBY(
        HSTACK(
            c,
            j
        ),
        a,
        COUNTA,
        0,
        0
    ),
    n,
    MAP(l,
    LAMBDA(x,
    IFERROR(INDEX(a,
    FILTER(CHOOSECOLS(
        m,
        2
    ),
    (TAKE(
        m,
        ,
        -1
    )>1)*(TAKE(
        m,
        ,
        1
    )=x),
    "")),
    "-"))),
    HSTACK(
        l,
        VLOOKUP(
            l,
            k,
            2,
            0
        ),
        n
    ))
Excel solution 8 for Determining Missing Fields, proposed by Sunny Baggu:
=LET(     _l,
     {"Name"; "Family"; "Phone"; "Website"},     _s,
     SEQUENCE(
         ROWS(
             B3:B15
         )
     ),     _c,
     B3:B15 = B3,     _st,
     FILTER(
         _s,
          _c
     ),     _en,
     VSTACK(
         DROP(
             _st,
              1
         ) - 1,
          TAKE(
              _s,
               -1
          )
     ),     REDUCE(          {"Record No",
          "Missing Fields",
          "Duplicate fields"},          SEQUENCE(
              ROWS(
                  _st
              )
          ),          LAMBDA(
              x,
               y,
              
               VSTACK(
                   
                    x,
                   
                    LET(
                        
                         _c1,
                         INDEX(
                             _st,
                              y,
                              1
                         ),
                        
                         _c2,
                         INDEX(
                             _en,
                              y,
                              1
                         ),
                        
                         _f,
                         INDEX(
                             B3:C15,
                              SEQUENCE(
                                  _c2 - _c1 + 1,
                                   ,
                                   _c1
                              ),
                              {1,
                              2}
                         ),
                        
                         _cri,
                         MAP(
                             _l,
                              LAMBDA(
                                  a,
                                   SUM(
                                       N(
                                           TAKE(
                                               _f,
                                                ,
                                                1
                                           ) = a
                                       )
                                   )
                              )
                         ),
                        
                         _m,
                         ARRAYTOTEXT(
                             FILTER(
                                 _l,
                                  _cri = 0,
                                  "-"
                             )
                         ),
                        
                         _d,
                         ARRAYTOTEXT(
                             FILTER(
                                 _l,
                                  _cri > 1,
                                  "-"
                             )
                         ),
                        
                         HSTACK(
                             y,
                              _m,
                              _d
                         )
                         
                    )
                    
               )
               
          )     ))
Excel solution 9 for Determining Missing Fields, proposed by Bilal Mahmoud kh.:
=VSTACK(
    {"Record No.",
    "Missing Fields",
    "Duplicat Fields"},
    LET(
        a,
        TEXTSPLIT(
            TEXTJOIN(
                ",",
                ,
                SCAN(
                    "",
                    B3:B15,
                    LAMBDA(
                        x,
                        y,
                        IF(
                            y="Name",
                            "|Name",
                            y
                        )
                    )
                )
            ),
            ,
            "|",
            TRUE
        ),
        b,
        MAP(
            a,
            LAMBDA(
                y,
                TEXTJOIN(
                    ",",
                    TRUE,
                    MAP(
                        {"Name",
                        "Family",
                        "Phone",
                        "Website"},
                        LAMBDA(
                            x,
                            IF(
                                ISNUMBER(
                                    FIND(
                                        x,
                                        y,
                                        1
                                    )
                                ),
                                "",
                                x
                            )
                        )
                    )
                )
            )
        ),
        c,
        MAP(
            a,
            LAMBDA(
                y,
                TEXTJOIN(
                    ",",
                    TRUE,
                    MAP(
                        {"Name",
                        "Family",
                        "Phone",
                        "Website"},
                        LAMBDA(
                            x,
                            LET(
                                n,
                                TEXTSPLIT(
                                    y,
                                    ",",
                                    ,
                                    TRUE
                                ),
                                m,
                                IF(
                                    COUNTA(
                                        IFERROR(
                                            FILTER(
                                                n,
                                                n=x
                                            ),
                                            ""
                                        )
                                    )>1,
                                    x,
                                    ""
                                ),
                                m
                            )
                        )
                    )
                )
            )
        ),
        HSTACK(
            SEQUENCE(
                4
            ),
            IF(
                b="",
                "-",
                b
            ),
            IF(
                c="",
                "-",
                c
            )
        )
    )
)
Excel solution 10 for Determining Missing Fields, proposed by Hussein SATOUR:
=TEXTSPLIT(
    CONCAT(
        LET(
            i,
            B3:B15,
            b,
            SCAN(
                ,
                IF(
                    i="Name",
                    SEQUENCE(
                        COUNTA(
                            i
                        )
                    )
                ),
                LAMBDA(
                    x,
                    y,
                    IF(
                        y,
                        y,
                        x
                    )
                )
            ),
            c,
            XMATCH(
                b,
                UNIQUE(
                    b
                )
            ),
            BYROW(
                UNIQUE(
                    c
                ),
                LAMBDA(
                    z,
                    LET(
                        d,
                        FILTER(
                            i,
                            c=z
                        ),
                        z&"/"&IFERROR(
                            ARRAYTOTEXT(
                                UNIQUE(
                                    VSTACK(
                                        UNIQUE(
                            i
                        ),
                                        d
                                    ),
                                    ,
                                    1
                                )
                            ),
                            "-"
                        )&"/"&IFERROR(
                            ARRAYTOTEXT(
                                UNIQUE(
                                    FILTER(
                                        d,
                                        XMATCH(
                                            d,
                                            d
                                        )-XMATCH(
                                            d,
                                            d,
                                            ,
                                            -1
                                        )
                                    )
                                )
                            ),
                            "-"
                        )
                    )
                )
            )
        )&"|"
    ),
    "/",
    "|",
    1
)

Solving the challenge of Determining Missing Fields with Python

Python solution 1 for Determining Missing Fields, proposed by Konrad Gryczan, PhD:
import pandas as pd

# Read the Excel file
path = 'CH-074 Determining missing fields.xlsx'
input = pd.read_excel(path, usecols="B:C", skiprows=1, nrows=14)
test = pd.read_excel(path, usecols="F:H", skiprows=1, nrows=4)

input['cumsum'] = (input['Info'] == "Name").cumsum()
result = (input.groupby(['cumsum', 'Info'])
 .size()
 .unstack(fill_value=0)
 .reset_index()
 .melt(id_vars=['cumsum'], var_name='Info', value_name='n')
 .groupby(['cumsum', 'n'])['Info']
 .apply(list)
 .unstack(fill_value=[])
 .reset_index())
result['Missing fields'] = result[0].apply(lambda x: "-" if not x else ", ".join(x))
result['Duplicate Fields'] = result[2].apply(lambda x: "-" if not x else ", ".join(x))
result = result[['cumsum', 'Missing fields', 'Duplicate Fields']]
result.columns = ['Record No', 'Missing fields', 'Duplicate Fields']

print(result)
print(test)

 

Python solution 2 for Determining Missing Fields, proposed by Abdallah Ally:
import pandas as pd

# Read the Excel file
file_path = 'CH-074 Determining missing fields.xlsx'
df = pd.read_excel(file_path, skiprows=1, usecols='B:C')

# Perform data wrangling
df['Order'] = df['Info'].eq('Name'
Python solution 2 for Determining Missing Fields, proposed by Abdallah Ally:

Leave a Reply