Customer information including Name, Family, Phone, and Website is provided in the question table. Determine for each customer which fields are missing and which fields are duplicated.
📌 Challenge Details and Links
Challenge Number: 74
Challenge Difficulty: ⭐⭐⭐
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Determining Missing Fields with Power Query
Power Query solution 1 for Determining Missing Fields, proposed by Zoran Milokanović:
let
Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content],
F = {"Name", "Family", "Phone", "Website"},
P = Table.Group(
Source,
"Info",
{"A", each [Info]},
0,
(b, n) => Byte.From(List.PositionOf(F, b) >= List.PositionOf(F, n))
)[A],
S = Table.FromRows(
List.Transform(
P,
each
let
d = (b, n) =>
let
d = List.Difference(b, n)
in
Text.Combine({d, {"-"}}{Byte.From(d = {})}, ", ")
in
{List.PositionOf(P, _) + 1, d(F, _), d(_, F)}
),
{"Record No", "Missing Fields", "Duplicate fields"}
)
in
SPower Query solution 2 for Determining Missing Fields, proposed by Aditya Kumar Darak 🇮🇳:
let
Source = Excel.CurrentWorkbook(){[Name = "data"]}[Content],
AllHead = List.Distinct(Source[Info]),
Group = Table.Group(
Source,
"Info",
{
"R",
each [
U = Table.Distinct(_, "Info"),
F1 = [Info],
F2 = U[Info],
Missing = Text.Combine(List.Difference(AllHead, F2), ", "),
Duplicate = Text.Combine(List.Difference(F1, F2), ", ")
]
},
0,
(x, y) => Number.From(y = "Name")
),
Table = Table.FromRecords(Group[R], type table [Missing = text, Duplicate = text]),
Index = Table.AddIndexColumn(Table, "Record No", 1, 1, Int64.Type),
Return = Table.ReorderColumns(Index, {"Record No", "Missing", "Duplicate"})
in
ReturnPower Query solution 3 for Determining Missing Fields, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
Group = Table.Combine(Table.Group(Source, {"Info"}, {{"A", each
let
a = [Info],
b = List.Distinct(Source[Info]),
c = Text.Combine(List.Difference(b,a), ", "),
d = List.Select(b, each List.Count(List.Select(a, (x)=> (Text.Contains(x,_))))>1){0}? ??""
in Table.FromColumns({{c},{d}}, {"Missing Field", "Duplicate Fields"})
}},0, (a,b)=> Number.From(b[Info] = "Name"))[A]),
Sol = Table.FromColumns({{1..List.Count(List.Select(Source[Info], each _ = "Name"))}}
&Table.ToColumns(Group),{"Record No."}&Table.ColumnNames(Group))
in
SolPower Query solution 4 for Determining Missing Fields, proposed by Yaroslav Drohomyretskyi:
let
Джерело = Excel.CurrentWorkbook(){[Name = "Таблиця1"]}[Content],
Result = Table.SelectColumns(
Table.AddIndexColumn(
Table.Group(
Table.FillDown(
Table.AddColumn(
Table.AddIndexColumn(Джерело, "Index", 1, 1, Int64.Type),
"x",
each if [Info] = "Name" then [Index] else null
),
{"x"}
),
{"x"},
{
{
"Missing Fields",
each Text.Combine(List.Difference(List.Distinct(Джерело[Info]), _[Info]), ", ")
},
{
"Duplicate Fields",
each Text.Combine(List.Difference(_[Info], List.Distinct(Джерело[Info])), ", ")
}
}
),
"Record No",
1,
1,
Int64.Type
),
{"Record No", "Missing Fields", "Duplicate Fields"}
)
in
ResultPower Query solution 5 for Determining Missing Fields, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
S = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
A = Table.AddIndexColumn(S, "In", 1, 1, Int64.Type),
B = Table.AddColumn(A, "Row", each if [Info] = "Name" then [In] else null),
C = Table.FillDown(B, {"Row"}),
D = Table.RemoveColumns(C, {"In"}),
E = Table.Group(
D,
{"Row"},
{{"Tbl", each _, type table [Info = text, Info2 = text, Custom = number]}}
),
F = Table.AddColumn(
E,
"Missing",
each Text.Combine(List.Difference(List.Distinct(S[Info]), List.Distinct([Tbl][Info])), ", ")
),
DupF = (Dup) =>
let
TD = Table.Group(Dup, {"Info"}, {{"Count", each Table.RowCount(_), Int64.Type}}),
TD2 = Table.SelectRows(TD, each [Count] <> 1),
TD3 = Text.Combine(TD2[Info], ", ")
in
TD3,
G = Table.AddColumn(F, "Duplicate", each DupF([Tbl])),
H = Table.AddIndexColumn(G, "Record No", 1, 1, Int64.Type),
I = Table.SelectColumns(H, {"Record No", "Missing", "Duplicate"})
in
ISolving the challenge of Determining Missing Fields with Excel
Excel solution 1 for Determining Missing Fields, proposed by Bo Rydobon 🇹🇭:
=LET(x,
B3:B15,
n,
SCAN(0,
x,
LAMBDA(a,
v,
a+(v=@+x))),p,
PIVOTBY(
n,
x,
x,
ROWS,
,
0,
,
0
),
L,
LAMBDA(
c,
BYROW(
DROP(
p,
1,
1
),
LAMBDA(
x,
ARRAYTOTEXT(
FILTER(
DROP(
TAKE(
p,
1
),
,
1
),
IF(
c,
N(
+x
)>1,
x=""
),
"-"
)
)
)
)
),VSTACK(
F2:H2,
HSTACK(
UNIQUE(
n
),
L(
0
),
L(
1
)
)
))
Excel solution 2 for Determining Missing Fields, proposed by 🇰🇷 Taeyong Shin:
=LET(
d,
B3:B15,
n,
SCAN(
,
XMATCH(
d&MAP(
d,
LAMBDA(
x,
COUNTIF(
A3:x,
x
)
)
),
"Name"&{1;2;3;4}
),
LAMBDA(
a,
v,
IFNA(
v,
a
)
)
),
VSTACK(
F2:H2,
DROP(
GROUPBY(
n,
d,
HSTACK(
LAMBDA(
x,
ARRAYTOTEXT(
IFERROR(
UNIQUE(
VSTACK(
B3:B6,
x
),
,
1
),
"-"
)
)
),
LAMBDA(
x,
LET(
u,
UNIQUE(
x
),
ARRAYTOTEXT(
FILTER(
u,
MMULT(
XMATCH(
u,
x,
,
{-1,
1}
),
{1;-1}
),
"-"
)
)
)
)
),
,
0
),
1
)
)
)
Excel solution 3 for Determining Missing Fields, proposed by محمد حلمي:
=LET(
b,
B3:B15,
m,
UNIQUE(
b
),
s,
SEQUENCE(
COUNTIF(
b,
B3
)
), HSTACK(
s,
MAP(
{1,
0}&s,
LAMBDA(
a,
LET(
i,
LEFT(
a
),
v,
FILTER(
b,
a=i&SCAN(
0,
b=B3,
LAMBDA(
a,
v,
a+v
)
)
),
TEXTJOIN(
", ",
,
IF(
-i,
REPT(
m,
ISNA(
XMATCH(
m,
v
)
)
),
UNIQUE(
REPT(
v,
XMATCH(
v,
v
)
Excel solution 4 for Determining Missing Fields, proposed by محمد حلمي:
=LET(
b,
B3:B15,
k,
SCAN(
0,
b=B3,
LAMBDA(
a,
v,
a+v
)
), REDUCE(
F2:H2,
UNIQUE(
k
),
LAMBDA(
a,
v,
LET(
y,
UNIQUE(
b
),
i,
FILTER(
b,
k=v
),
e,
XMATCH(
y,
i
),
VSTACK(
a,
HSTACK(
v,
TEXTJOIN(
", ",
,
REPT(
y,
ISNA(
e
)
)
),
TEXTJOIN(
", ",
,
REPT(
y,
IFNA(
XMATCH(
y,
i,
,
-1
)>e,
)
)
)
)
)
)
)
)
)
Excel solution 5 for Determining Missing Fields, proposed by Oscar Mendez Roca Farell:
=LET(
b,
B3:B15,
m,
MAP(
b,
LAMBDA(
b,
COUNTIF(
B3:b,
"Name"
)
)
),
REDUCE(
F2:H2,
UNIQUE(
m
),
LAMBDA(
i,
x,
LET(
e,
UNIQUE(
b
),
f,
FILTER(
b,
m=x
),
VSTACK(
i,
HSTACK(
x,
BYCOL(
IF(
MMULT(
N(
TOROW(
f
)=e
),
SEQUENCE(
ROWS(
f
)
)^0
)={0,
2},
e,
""
),
LAMBDA(
c,
TEXTJOIN(
", ",
,
c
)
)
)
)
)
)
)
)
)
Excel solution 6 for Determining Missing Fields, proposed by Julian Poeltl:
=LET(
Q,
B3:C15,
I,
TAKE(
Q,
,
1
),
U,
UNIQUE(
I
),
BR,
"Name"&DROP(
TEXTSPLIT(
TEXTJOIN(
"|",
,
I
),
,
"Name"
),
1
),
NR,
SEQUENCE(
ROWS(
BR
)
),
MF,
IFERROR(
MAP(
BR,
LAMBDA(
A,
TEXTJOIN(
", ",
,
FILTER(
U,
ISERROR(
SEARCH(
U,
A
)
)
)
)
)
),
"-"
),
DF,
IFERROR(
MAP(
BR,
LAMBDA(
A,
TEXTJOIN(
", ",
,
FILTER(
U,
ISNUMBER(
SEARCH(
U&"*"&U,
A
)
)
)
)
)
),
"-"
),
VSTACK(
HSTACK(
"Record No",
"Missing Fields",
"Duplicate Fields"
),
HSTACK(
NR,
MF,
DF
)
)
)
Excel solution 7 for Determining Missing Fields, proposed by Kris Jaganah:
=LET(a,
B3:B15,
b,
C3:C15,
c,
SCAN(
0,
a,
LAMBDA(
x,
y,
IF(
y="Name",
x+1,
x
)
)
),
d,
UNIQUE(
a
),
e,
ROWS(
d
),
f,
MAX(
c
),
g,
INT(
SEQUENCE(
f,
e,
,
1/e
)
),
h,
TOCOL(
g
),
i,
TOCOL(
g,
,
1
),
j,
XMATCH(
a,
a
),
k,
PIVOTBY(
h,
XLOOKUP(
h&i,
c&K3#,
c,
0
),
INDEX(
a,
i
),
ARRAYTOTEXT
),
l,
UNIQUE(
c
),
m,
GROUPBY(
HSTACK(
c,
j
),
a,
COUNTA,
0,
0
),
n,
MAP(l,
LAMBDA(x,
IFERROR(INDEX(a,
FILTER(CHOOSECOLS(
m,
2
),
(TAKE(
m,
,
-1
)>1)*(TAKE(
m,
,
1
)=x),
"")),
"-"))),
HSTACK(
l,
VLOOKUP(
l,
k,
2,
0
),
n
))
Excel solution 8 for Determining Missing Fields, proposed by Sunny Baggu:
=LET( _l,
{"Name"; "Family"; "Phone"; "Website"}, _s,
SEQUENCE(
ROWS(
B3:B15
)
), _c,
B3:B15 = B3, _st,
FILTER(
_s,
_c
), _en,
VSTACK(
DROP(
_st,
1
) - 1,
TAKE(
_s,
-1
)
), REDUCE( {"Record No",
"Missing Fields",
"Duplicate fields"}, SEQUENCE(
ROWS(
_st
)
), LAMBDA(
x,
y,
VSTACK(
x,
LET(
_c1,
INDEX(
_st,
y,
1
),
_c2,
INDEX(
_en,
y,
1
),
_f,
INDEX(
B3:C15,
SEQUENCE(
_c2 - _c1 + 1,
,
_c1
),
{1,
2}
),
_cri,
MAP(
_l,
LAMBDA(
a,
SUM(
N(
TAKE(
_f,
,
1
) = a
)
)
)
),
_m,
ARRAYTOTEXT(
FILTER(
_l,
_cri = 0,
"-"
)
),
_d,
ARRAYTOTEXT(
FILTER(
_l,
_cri > 1,
"-"
)
),
HSTACK(
y,
_m,
_d
)
)
)
) ))
Excel solution 9 for Determining Missing Fields, proposed by Bilal Mahmoud kh.:
=VSTACK(
{"Record No.",
"Missing Fields",
"Duplicat Fields"},
LET(
a,
TEXTSPLIT(
TEXTJOIN(
",",
,
SCAN(
"",
B3:B15,
LAMBDA(
x,
y,
IF(
y="Name",
"|Name",
y
)
)
)
),
,
"|",
TRUE
),
b,
MAP(
a,
LAMBDA(
y,
TEXTJOIN(
",",
TRUE,
MAP(
{"Name",
"Family",
"Phone",
"Website"},
LAMBDA(
x,
IF(
ISNUMBER(
FIND(
x,
y,
1
)
),
"",
x
)
)
)
)
)
),
c,
MAP(
a,
LAMBDA(
y,
TEXTJOIN(
",",
TRUE,
MAP(
{"Name",
"Family",
"Phone",
"Website"},
LAMBDA(
x,
LET(
n,
TEXTSPLIT(
y,
",",
,
TRUE
),
m,
IF(
COUNTA(
IFERROR(
FILTER(
n,
n=x
),
""
)
)>1,
x,
""
),
m
)
)
)
)
)
),
HSTACK(
SEQUENCE(
4
),
IF(
b="",
"-",
b
),
IF(
c="",
"-",
c
)
)
)
)
Excel solution 10 for Determining Missing Fields, proposed by Hussein SATOUR:
=TEXTSPLIT(
CONCAT(
LET(
i,
B3:B15,
b,
SCAN(
,
IF(
i="Name",
SEQUENCE(
COUNTA(
i
)
)
),
LAMBDA(
x,
y,
IF(
y,
y,
x
)
)
),
c,
XMATCH(
b,
UNIQUE(
b
)
),
BYROW(
UNIQUE(
c
),
LAMBDA(
z,
LET(
d,
FILTER(
i,
c=z
),
z&"/"&IFERROR(
ARRAYTOTEXT(
UNIQUE(
VSTACK(
UNIQUE(
i
),
d
),
,
1
)
),
"-"
)&"/"&IFERROR(
ARRAYTOTEXT(
UNIQUE(
FILTER(
d,
XMATCH(
d,
d
)-XMATCH(
d,
d,
,
-1
)
)
)
),
"-"
)
)
)
)
)&"|"
),
"/",
"|",
1
)
Solving the challenge of Determining Missing Fields with Python
Python solution 1 for Determining Missing Fields, proposed by Konrad Gryczan, PhD:
import pandas as pd
# Read the Excel file
path = 'CH-074 Determining missing fields.xlsx'
input = pd.read_excel(path, usecols="B:C", skiprows=1, nrows=14)
test = pd.read_excel(path, usecols="F:H", skiprows=1, nrows=4)
input['cumsum'] = (input['Info'] == "Name").cumsum()
result = (input.groupby(['cumsum', 'Info'])
.size()
.unstack(fill_value=0)
.reset_index()
.melt(id_vars=['cumsum'], var_name='Info', value_name='n')
.groupby(['cumsum', 'n'])['Info']
.apply(list)
.unstack(fill_value=[])
.reset_index())
result['Missing fields'] = result[0].apply(lambda x: "-" if not x else ", ".join(x))
result['Duplicate Fields'] = result[2].apply(lambda x: "-" if not x else ", ".join(x))
result = result[['cumsum', 'Missing fields', 'Duplicate Fields']]
result.columns = ['Record No', 'Missing fields', 'Duplicate Fields']
print(result)
print(test)
Python solution 2 for Determining Missing Fields, proposed by Abdallah Ally:
import pandas as pd
# Read the Excel file
file_path = 'CH-074 Determining missing fields.xlsx'
df = pd.read_excel(file_path, skiprows=1, usecols='B:C')
# Perform data wrangling
df['Order'] = df['Info'].eq('Name'
