K-nearest neighbors (KNN) is a simple technique for replacing missing values with the average of the nearest values. In the question table, the first five rows are complete, while the next five rows contain missing values. Replace the missing values by KNN using K=2 based on the procedure below: Consider row 6 with x=57, y=68, and z=blank. For the blank cells, calculate the distance of each of the first five rows from this row based on the filled values (x, y) using the formula: (𝑥𝑖−𝑥6)^2+(𝑦𝑖−𝑦6)^2 Resulting in distances of (5252, 1649, 765, 65, 1625). Select the two rows with the lowest distances (in this case, row 3 and row 4). Replace the value of z in row 6 with the average of the z values in the selected rows: (10+94)/2
📌 Challenge Details and Links
Challenge Number: 86
Challenge Difficulty: ⭐⭐⭐
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Knn Missing Value! with Power Query
Power Query solution 1 for Knn Missing Value!, proposed by Omid Motamedisedeh:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
L1 = List.Select(Table.ToList(Source, each _), each List.NonNullCount(_) = 4),
L2 = List.Select(Table.ToList(Source, each _), each List.NonNullCount(_) < 4),
RL2 = List.TransformMany(
L2,
each {
[
a = List.Transform(
L1,
(x) => List.Accumulate({1, 2, 3}, 0, (m, n) => m + Number.Power((_{n} - x{n}) ?? 0, 2))
),
b = List.PositionOfAny(a, List.MinN(a, 2), 2),
c = List.Transform(
{0 .. 3},
(r) => if _{r} = null then (L1{b{0}}{r} + L1{b{1}}{r}) / 2 else _{r}
)
][c]
},
(a, b) => b
),
Result = Table.FromRows(List.Combine({L1, RL2}), Table.ColumnNames(Source))
in
Result
Solving the challenge of Knn Missing Value! with Excel
Excel solution 1 for Knn Missing Value!, proposed by Bo Rydobon 🇹🇭:
=LET(n,
D3:F12,
k,
TAKE(
n,
5
),
MAP(n,
LAMBDA(v,
LET(a,
D3:v,
r,
INDEX(
n,
ROWS(
a
), ),
IF(v,
v,
AVERAGE(TAKE(SORTBY(INDEX(
k,
,
COLUMNS(
a
)
),
BYROW(FILTER((k-r)^2,
r),
SUM)),
2)))))))
Excel solution 2 for Knn Missing Value!, proposed by محمد حلمي:
=MAP(D3:F12,
LAMBDA(a,
IF(a,
a,
LET(
d,
D3:F12,
q,
TAKE(
d,
XMATCH(
,
TOCOL(
d
)
)/3-1
),w,
CHOOSEROWS(
d,
ROW(
a
)-2
),s,
BYROW(FILTER((q-w)^2,
w),LAMBDA(
a,
SUM(
a
)
)),
AVERAGE(
XLOOKUP(
SMALL(
s,
{1,
2}
),
s,
INDEX(
q,
,
COLUMN(
a
)-3
)
)
)))))
Excel solution 3 for Knn Missing Value!, proposed by Oscar Mendez Roca Farell:
=--TEXTSPLIT(CONCAT(BYROW(D8:F12,
LAMBDA(r,
LET(e,
FILTER(
r,
r>0
),
b,
BYCOL(TAKE(SORTBY(FILTER(
D3:F7,
r=""
),
MMULT((e-FILTER(
D3:F7,
r>0
))^2,
TOCOL(
e
)^0)),
2),
LAMBDA(
c,
AVERAGE(
c
)
)),
ARRAYTOTEXT(
IF(
COUNT(
e
)>1,
IF(
r,
r,
b
),
HSTACK(
e,
b
)
)
))))&"|"),
",",
"|",
1,
,
"")
Excel solution 4 for Knn Missing Value!, proposed by Julian Poeltl:
=LET(T,
D3:F12,
R,
ROWS(
T
),
C,
COLUMNS(
T
),
Q,
LAMBDA(A,
D,
IF(A=0,
0,
(TAKE(
D,
5
)-A)^2)),
M,
LAMBDA(
R,
C,
LET(
CO,
IF(
C=1,
2,
1
),
CT,
IF(
C=3,
2,
3
),
B,
Q(
INDEX(
T,
R,
CO
),
CHOOSECOLS(
T,
CO
)
)+Q(
INDEX(
T,
R,
CT
),
CHOOSECOLS(
T,
CT
)
),
AVERAGE(
INDEX(
T,
XMATCH(
SMALL(
B,
SEQUENCE(
2
)
),
B
),
C
)
)
)
),
IF(
T=0,
MAP(
MAKEARRAY(
R,
C,
LAMBDA(
A,
B,
A
)
),
MAKEARRAY(
R,
C,
LAMBDA(
A,
B,
B
)
),
LAMBDA(
R,
C,
M(
R,
C
)
)
),
T
))
Excel solution 5 for Knn Missing Value!, proposed by Julian Poeltl:
=LET(X,
D3:D12,
Y,
E3:E12,
Z,
F3:F12,
Q,
LAMBDA(A,
D,
IF(A=0,
0,
(TAKE(
D,
5
)-A)^2)),
XM,
LAMBDA(
A,
LET(
B,
Q(
OFFSET(
A,
,
1
),
Y
)+Q(
OFFSET(
A,
,
2
),
Z
),
AVERAGE(
INDEX(
X,
XMATCH(
SMALL(
B,
SEQUENCE(
2
)
),
B
)
)
)
)
),
YM,
LAMBDA(
A,
LET(
B,
Q(
OFFSET(
A,
,
-1
),
X
)+Q(
OFFSET(
A,
,
1
),
Z
),
AVERAGE(
INDEX(
Y,
XMATCH(
SMALL(
B,
SEQUENCE(
2
)
),
B
)
)
)
)
),
ZM,
LAMBDA(
A,
LET(
B,
Q(
OFFSET(
A,
,
-2
),
X
)+Q(
OFFSET(
A,
,
-1
),
Y
),
AVERAGE(
INDEX(
Z,
XMATCH(
SMALL(
B,
SEQUENCE(
2
)
),
B
)
)
)
)
),
HSTACK(
VSTACK(
TAKE(
X,
5
),
MAP(
DROP(
X,
5
),
LAMBDA(
A,
IF(
A="",
XM(
A
),
A
)
)
)
),
VSTACK(
TAKE(
Y,
5
),
MAP(
DROP(
Y,
5
),
LAMBDA(
A,
IF(
A="",
YM(
A
),
A
)
)
)
),
VSTACK(
TAKE(
Z,
5
),
MAP(
DROP(
Z,
5
),
LAMBDA(
A,
IF(
A="",
ZM(
A
),
A
)
)
)
)
))
Excel solution 6 for Knn Missing Value!, proposed by Sunny Baggu:
=VSTACK(
C2:F7, HSTACK(
C8:C12, DROP(
REDUCE(
"⚽", SEQUENCE(
ROWS(
C8:C12
)
), LAMBDA(g,
h, VSTACK(
g, LET(
k,
INDEX(
D8:F12,
h, ), LET(
t,
D3:F7, r,
k, f,
r <> "", _a,
BYROW((FILTER(
t,
f
) - FILTER(
r,
f
)) ^ 2,
LAMBDA(
x,
SUM(
x
)
)), _b,
BYCOL( CHOOSEROWS(
FILTER(
t,
1 - f
),
XMATCH(
TAKE(
SORT(
_a
),
2
),
_a
)
), LAMBDA(
a,
AVERAGE(
a
)
) ), _c,
TOCOL(
VSTACK(
FILTER(
r,
f
),
_b
),
3
), _d,
TOCOL(
HSTACK(
TOROW(
IF(
f,
{1,
2,
3},
x
),
3
),
IF(
r = "",
{1,
2,
3},
x
)
),
3
), TOROW(
SORTBY(
_c,
_d
)
)
)
)
)
)
), 1
)
)
)
Excel solution 7 for Knn Missing Value!, proposed by Bilal Mahmoud kh.:
=LET(
R,
LAMBDA(
a,
b,
LET(
xi,
CHOOSECOLS(
a,
1
),
yi,
CHOOSECOLS(
a,
2
),
zi,
CHOOSECOLS(
a,
3
),
x,
CHOOSECOLS(
b,
1
),
y,
CHOOSECOLS(
b,
2
),
z,
CHOOSECOLS(
b,
3
),
OP,
LAMBDA(
n,
m,
ni,
mi,
POWER(
n-ni,
2
)+POWER(
m-mi,
2
)
),
Re,
IF(
z=0,
LET(
i,
OP(
x,
y,
xi,
yi
),
ii,
AVERAGE(
FILTER(
zi,
i<=SMALL(
i,
2
)
)
),
HSTACK(
x,
y,
ii
)
),
IF(
y=0,
LET(
i,
OP(
x,
z,
xi,
zi
),
ii,
AVERAGE(
FILTER(
yi,
i<=SMALL(
i,
2
)
)
),
HSTACK(
x,
ii,
z
)
),
LET(
i,
OP(
y,
z,
yi,
zi
),
ii,
AVERAGE(
FILTER(
xi,
i<=SMALL(
i,
2
)
)
),
HSTACK(
ii,
y,
z
)
)
)
),
Re
)
),
VSTACK(
D3:F7,
TEXTSPLIT(
TEXTJOIN(
"|",
,
BYROW(
D8:F12,
LAMBDA(
t,
TEXTJOIN(
",",
,
R(
D3:F7,
t
)
)
)
)
),
",",
"|"
)
)
)
Excel solution 8 for Knn Missing Value!, proposed by Nicolas Micot:
=MOYENNE(INDEX(F$3:F$7;
PRENDRE(TRIERPAR($C$3:$C$7;
BYROW($D$3:$F$7;
LAMBDA(l_row;
SOMME(SI($D8:$F8<>"";
(l_row-$D8:$F8)^2)))));
2)))
