The question table presents the actual progress of 3 projects at the end of each month, but some values are missing. Like in the result table, we want to fill in the missing values using linear interpolation between the known values before and after the missing period. For example, for Project A: The progress for month 8 is missing, so the average of the progress values for months 7 and 9 is used. For months 4, 5, and 6, the progress is interpolated linearly from month 3 to month 7
📌 Challenge Details and Links
Challenge Number: 62
Challenge Difficulty: ⭐⭐⭐⭐
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Missing Values! Part 2 with Power Query
Power Query solution 1 for Missing Values! Part 2, proposed by Omid Motamedisedeh:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
S1 = Table.TransformColumns(Source, {{"Date", Date.Month}}),
Result = Table.FromRows(
List.Combine(
List.Transform(
{"A" .. "C"},
(r) =>
List.Accumulate(
{1 .. 12},
{},
(a, b) =>
a
& {
[
y1 = Table.Last(Table.SelectRows(S1, each _[Project] = r and _[Date] <= b)),
y2 = Table.SelectRows(S1, each _[Project] = r and _[Date] >= b){0},
z = y1[Actual Progress]
+ (b - y1[Date])
* (y2[Actual Progress] - y1[Actual Progress])
/ List.Max({y2[Date] - y1[Date], 1}),
w = {Date.EndOfMonth(#date(2023, b, 1)), r, z}
][w]
}
)
)
),
Table.ColumnNames(S1)
)
in
ResultPower Query solution 2 for Missing Values! Part 2, proposed by Aditya Kumar Darak 🇮🇳:
let
Source = Excel.CurrentWorkbook(){[Name = "data"]}[Content],
Type = Table.TransformColumnTypes(Source, {"Date", type date}),
MyFun = (start_date, end_date) =>
Date.Year(end_date)
* 12 + Date.Month(end_date) - Date.Year(start_date)
* 12 - Date.Month(start_date) + 1,
Group = Table.Group(
Type,
"Project",
{
"R",
each List.Generate(
() => [a = List.Min([Date]), b = _{[Date = a]}[Actual Progress]],
(f) => f[a] <= List.Max([Date]),
(f) => [
a = Date.EndOfMonth(Date.AddMonths(f[a], 1)),
b = _{[Date = a]}?[Actual Progress]? ?? f[b] + e,
c = Table.SelectRows(_, (x) => x[Date] >= a){0},
d = MyFun(a, c[Date]),
e = (c[Actual Progress] - f[b]) / d
],
(f) => [Date = f[a], Project = _{0}[Project], Actual Progress = f[b]]
)
}
),
Return = Table.FromRecords(List.Combine(Group[R]))
in
ReturnPower Query solution 3 for Missing Values! Part 2, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
Type = Table.TransformColumnTypes(Source,{{"Date", type date}}),
Sol = Table.Combine(Table.Group(Type, {"Project"}, {{"A", each
let
a = _,
b = List.Transform(a[Date], each Date.Month(_)),
c = {b{0}..List.Last(b)},
d = List.Transform(List.Difference(c,b), each {Date.EndOfMonth(
hashtag
#date(2023,_,1)), a[Project]{0}, null}),
e = Table.Sort(a&Table.FromRows(d, Table.ColumnNames(a)), "Date"),
f = Table.Group(e, {"Actual Progress"}, {{"A", each _}, {"B", each Table.RowCount(_)}},0),
g = Table.AddIndexColumn(f, "Idx", 0,1),
h = List.Combine(Table.AddColumn(g, "N", each if [Actual Progress] <> null then {[Actual Progress]} else List.Transform({1..[B]}, (x)=> g[Actual Progress]{[Idx]-1}+(g[Actual Progress]{[Idx]+1}-g[Actual Progress]{[Idx]-1})/([B]+1)*x))[N]),
i = Table.FromColumns(List.RemoveLastN(Table.ToColumns(e))&{h}, Table.ColumnNames(e))
in i}})[A])
in
SolPower Query solution 4 for Missing Values! Part 2, proposed by Daniel Madhadha:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Type = Table.TransformColumnTypes(
Source,
{{"Date", type date}, {"Actual Progress", Percentage.Type}}
),
ChangeDate = (start_date, end_date) =>
Date.Year(end_date)
* 12 + Date.Month(end_date) - Date.Year(start_date)
* 12 - Date.Month(start_date) + 1,
Group = Table.Group(
Type,
"Project",
{
"R",
each List.Generate(
() => [a = List.Min([Date]), b = _{[Date = a]}[Actual Progress]],
(f) => f[a] <= List.Max([Date]),
(f) => [
a = Date.EndOfMonth(Date.AddMonths(f[a], 1)),
b = _{[Date = a]}?[Actual Progress]? ?? f[b] + e,
c = Table.SelectRows(_, (x) => x[Date] >= a){0},
d = ChangeDate(a, c[Date]),
e = (c[Actual Progress] - f[b]) / d
],
(f) => [Date = f[a], Project = _{0}[Project], Actual Progress = f[b]]
)
}
),
Return = Table.FromRecords(List.Combine(Group[R]))
in
ReturnAssistedSolving the challenge of Missing Values! Part 2 with Excel
Excel solution 1 for Missing Values! Part 2, proposed by Bo Rydobon 🇹🇭:
=LET(
d,
B3:B21,
p,
C3:C21,
REDUCE(
B2:D2,
UNIQUE(
p
),
LAMBDA(
a,
v,
LET(
s,
SEQUENCE(
12
),
e,
EOMONTH(
B3,
s-1
),
VSTACK(
a,
HSTACK(
e,
IF(
s,
v
),
MAP(
e,
LAMBDA(
x,
LET(
n,
XMATCH(
v&x,
p&d,
{-1,
1}
),
TREND(
INDEX(
D3:D21,
n
),
MONTH(
INDEX(
d,
n
)
),
MONTH(
x
)
)
)
)
)
)
)
)
)
)
)Excel solution 2 for Missing Values! Part 2, proposed by 🇰🇷 Taeyong Shin:
=LET(
d,
B3:B21,
c,
C3:C21,
s,
TOCOL(
UNIQUE(
c
)&EDATE(
@+d,
SEQUENCE(
,
12,
0
)
)
),
MAP(
s&{"",
"",
""},
IFNA(
{1,
2,
3},
s
),
LAMBDA(
x,
i,
LET(
n,
IFNA(
XMATCH(
x,
c&d,
{-1,
1}
),
ROWS(
d
)
),
a,
LEFT(
x
),
dt,
NUMBERVALUE(
x,
" ",
a
),
CHOOSE(
i,
dt,
a,
TREND(
INDEX(
D3:D21,
n
),
INDEX(
d,
n
),
dt
)
)
)
)
)
)Excel solution 3 for Missing Values! Part 2, proposed by محمد حلمي:
=LET(
c,
C3:C21,
y,
TOCOL(
EDATE(
B3,
SEQUENCE(
,
12,
0
)
)&UNIQUE(
c
)
), i,
XLOOKUP(
y,
B3:B21&c,
D3:D21
),
HSTACK(
--LEFT(
y,
5
),
RIGHT(
y
), ROUND(
SCAN(
0,
SEQUENCE(
ROWS(
i
)
),
LAMBDA(
a,
v,
LET(
j,
DROP(
i,
v-1
),
IFNA(
@j,
a+@TOCOL(
j-a,
2
)/XMATCH(
0,
-ISNA(
j
)
)
)
)
)
),
2
)
)
)
////
Not same value always
=REDUCE(
B2:D2,
UNIQUE(
C3:C21
),
LAMBDA(
a,
v,
LET(
x,
C3:C21=v,
b,
FILTER(
B3:B21,
x
),
d,
FILTER(
D3:D21,
x
),
n,
EDATE(
@b,
SEQUENCE(
12
)-1
),
VSTACK(
a,
CHOOSE(
{1,
2,
3},
n,
v,
IFNA(
XLOOKUP(
n,
b,
d
),
TREND(
d,
b,
n
)
)
)
)
)
)
)Excel solution 4 for Missing Values! Part 2, proposed by محمد حلمي:
=LET(
c,
C3:C21,
y,
TOCOL(
EDATE(
B3,
SEQUENCE(
,
12,
0
)
)&UNIQUE(
c
)
), i,
XLOOKUP(
y,
B3:B21&c,
D3:D21
),
HSTACK(
--LEFT(
y,
5
),
RIGHT(
y
), SCAN(
0,
SEQUENCE(
ROWS(
i
)
),
LAMBDA(
a,
v,
LET(
j,
DROP(
i,
v-1
),
IFNA(
@j,
a+@TOCOL(
j-a,
2
)/XMATCH(
0,
-ISNA(
j
)
)
)
)
)
)
)
)Excel solution 5 for Missing Values! Part 2, proposed by Julian Poeltl:
=LET(T,
B2:D21,
TT,
DROP(
T,
1
),
D,
TAKE(
TT,
,
1
),
SD,
MIN(
D
),
ED,
MAX(
D
),
P,
CHOOSECOLS(
TT,
2
),
UP,
UNIQUE(
P
),
AP,
DROP(
TT,
,
2
),
Dur,
MONTH(
ED
)-MONTH(
SD
)+1,
Se,
EOMONTH(
DATE(
YEAR(
SD
),
SEQUENCE(
Dur
),
1
),
0
),
R,
MAP(UP,
LAMBDA(A,
LET(BV,
XLOOKUP(
A&Se,
P&D,
AP,
,
-1
),
EV,
XLOOKUP(
A&Se,
P&D,
AP,
,
1
),
BM,
MONTH(
XLOOKUP(
A&Se,
P&D,
D,
,
-1
)
),
EM,
MONTH(
XLOOKUP(
A&Se,
P&D,
D,
,
1
)
),
IP,
IF(BV=EV,
BV,
BV+(EV-BV)/(EM-BM)*(MONTH(
Se
)-BM)),
TEXTJOIN(
",",
,
VSTACK(
Se,
REPT(
A&",",
Dur
),
IP
)
)))),
Z,
SUBSTITUTE(
TEXTJOIN(
",",
,
R
),
",,",
","
),
ST,
VSTACK(
TAKE(
T,
1
),
SORT(
WRAPROWS(
TOCOL(
WRAPCOLS(
TEXTSPLIT(
Z,
,
","
),
Dur
)
),
3
),
2
)
),
IFERROR(
ST*1,
ST
))Excel solution 6 for Missing Values! Part 2, proposed by Kris Jaganah:
=LET(a,
B3:B21,
b,
C3:C21,
c,
D3:D21,
d,
VSTACK(
DROP(
a,
1
),
TAKE(
a,
1
)
),
e,
VSTACK(
DROP(
b,
1
),
TAKE(
b,
1
)
),
f,
VSTACK(
DROP(
c,
1
),
TAKE(
c,
1
)
),
g,
MONTH(
a
),
h,
MONTH(
d
)-g,
i,
IF(b=e,
(f-c)/h,
c),
j,
TOCOL(
EOMONTH(
TAKE(
a,
1
),
SEQUENCE(
,
12,
0
)
)&UNIQUE(
b
)
),
k,
--LEFT(
j,
5
),
l,
RIGHT(
j
),
m,
XLOOKUP(
j,
a&b,
c,
0
),
n,
VSTACK(
0,
DROP(
m,
-1
)
),
o,
MAP(k,
l,
m,
LAMBDA(x,
y,
z,
IF(z=0,
TAKE(FILTER(i,
(d>=x)*(e=y)),
1),
0))),
p,
IF(
o>0,
n+o,
0
),
VSTACK(
{"Date",
"Project",
"Actual Progress"},
HSTACK(
k,
l,
SCAN(
,
p,
LAMBDA(
x,
y,
IF(
y>0,
x+y,
y
)
)
)+m
)
))Excel solution 7 for Missing Values! Part 2, proposed by Sunny Baggu:
=LET(
m,
SEQUENCE(
12
), _u,
UNIQUE(
C3:C21
), _a,
TOCOL(
IF(
SEQUENCE(
,
ROWS(
_u
)
),
EOMONTH(
DATE(
YEAR(
B3
),
m,
1
),
0
),
),
,
1
), _b,
TOCOL(
IF(
m,
TOROW(
_u
)
),
,
1
), _c,
XLOOKUP(
_a & _b,
B3:B21 & C3:C21,
D3:D21
), _s,
SEQUENCE(
ROWS(
_c
)
), _e1,
LAMBDA(
arr,
SCAN(
0,
arr,
LAMBDA(
a,
v,
IF(
ISERROR(
v
),
a,
v
)
)
)
), _fd,
_e1(
_c
), _fu,
SORTBY(_e1(SORTBY(
_c,
_s,
-1
)),
_s,
-1), _ll,
XMATCH(
_b & _fd,
_b & _fd
), _ul,
XMATCH(
_b & _fd,
_b & _fd,
1,
-1
) + 1, _r,
TOCOL(
IF(
SEQUENCE(
,
ROWS(
_u
)
),
m
),
,
1
), HSTACK( _a, _b, MAP( _fd, _fu, _ll, _ul, _s, LAMBDA(
r,
s,
t,
u,
v,
FORECAST.LINEAR(
v,
VSTACK(
r,
s
),
VSTACK(
t,
u
)
)
)
) )
)Solving the challenge of Missing Values! Part 2 with Python
Python solution 1 for Missing Values! Part 2, proposed by Konrad Gryczan, PhD:
import pandas as pd
from datetime import timedelta
from scipy.interpolate import interpolate
input = pd.read_excel("CH-062 Missing Values.xlsx", usecols="B:D", skiprows=1, nrows=19)
test = pd.read_excel("CH-062 Missing Values.xlsx", usecols="H:J", skiprows=1)
test.columns = test.columns.str.replace('.1', '')
input["Date"] = pd.to_datetime(input["Date"]) + timedelta(days=1)
all_dates = pd.date_range(start=input["Date"].min(), end=input["Date"].max(), freq='MS')
all_dates = pd.DataFrame(all_dates, columns=["Date"])
all_dates["Date"] = pd.to_datetime(all_dates["Date"]) - timedelta(days=1)
input["Date"] = pd.to_datetime(input["Date"]) - timedelta(days=1)
all_projects = pd.DataFrame(input["Project"].unique(), columns=["Project"])
all_dates["key"] = 0
all_projects["key"] = 0
all_dates = all_dates.merge(all_projects, on="key").drop(columns=["key"]).sort_values(["Project","Date"]).reset_index().drop(columns="index")
all_dates = all_dates.merge(input, on=["Project","Date"], how="left")
all_dates["Actual Progress"] = all_dates.groupby("Project")["Actual Progress"].transform(lambda x: x.interpolate())
print(all_dates["Actual Progress"].round(4).equals(test["Actual Progress"].round(4)))Solving the challenge of Missing Values! Part 2 with R
R solution 1 for Missing Values! Part 2, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
library(zoo)
library(padr)
input = read_excel("files/CH-062 Missing Values.xlsx", range = "B2:D21")
test = read_excel("files/CH-062 Missing Values.xlsx", range = "H2:J38")
result = input %>%
mutate(Date = Date + days(1)) %>%
group_by(Project) %>%
pad() %>%
mutate(`Actual Progress` = na.approx(`Actual Progress`)) %>%
ungroup() %>%
mutate(Date = Date - days(1))
all.equal(test,result)
#> [1] TRUER solution 2 for Missing Values! Part 2, proposed by Anil Kumar Goyal:
ashtag
#rstats
data <- read_excel("OM Challanges/CH-062 Missing Values.xlsx", range = "B2:D21")
data %>%
mutate(Date = as.Date(Date)) %>%
group_by(Project) %>%
complete(Date = seq.Date(min(Date)+1, max(Date)+1, by = "months")-1) %>%
mutate(`Actual Progress` = scales::percent(as.vector(imputeTS::na_interpolation(as.ts(`Actual Progress`)))))