Extract part no. and dates from the problem table into result table as shown. Sort on part no. and date. (Dates are in MDY format)
📌 Challenge Details and Links
ExcelBI Power Query Challenge Number: 199
Challenge Difficulty: ⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Extract Part and Date with Power Query
Power Query solution 1 for Extract Part and Date, proposed by Zoran Milokanović:
let
Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content],
F = (t, p) =>
Web.Page(
"<script> var t = """
& t
& """; var p = new RegExp('"
& p
& "', 'g'); document.write(t.match(p).join(';')); </script>"
)[Data]{0}[Children]{0}[Children]{1}[Text]{0},
E = (l, b) => List.Select(l, each Text.Contains(_, "/") = b),
H = {"Part No.", "Date"},
S = Table.Sort(
Table.FromRows(
List.TransformMany(
Table.TransformRows(
Table.ReplaceValue(Source, "//", "/", Replacer.ReplaceText, {"String"}),
each Text.Split(F([String], "bd{1,2}/d{1,2}/d{2,4}b|bd{3,}b"), ";")
),
each E(_, true),
(_, o) => {Number.From(E(_, false){0}), Date.From(o, "en-US")}
),
H
),
H
)
in
S
Power Query solution 2 for Extract Part and Date, proposed by Kris Jaganah:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Ans = Table.TransformColumns(
Source,
{
"String",
each [
a = Text.Split(Text.Replace(_, "//", "/"), " "),
b = List.RemoveItems(
List.Transform(a, each try DateTime.From(_, "en-US") otherwise ""),
{""}
),
c = List.Sum(List.RemoveNulls(List.Transform(a, each try Number.From(_) otherwise null)))
][[c], [b]]
}
),
Xpand = Table.ExpandRecordColumn(Ans, "String", {"c", "b"}, {"Part No.", "Date"}),
XpDate = Table.ExpandListColumn(Xpand, "Date"),
Sort = Table.Sort(XpDate, {{"Part No.", 0}, {"Date", 0}})
in
Sort
Power Query solution 3 for Extract Part and Date, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Sol = Table.Sort(
Table.Combine(
Table.AddColumn(
Source,
"A",
each
let
a = Text.SplitAny([String], " ,"),
b = List.RemoveNulls(List.Transform(a, each try Number.From(_) otherwise null)),
c = List.Transform(
List.Select(a, each Text.Contains(_, "/")),
each Date.From(Text.Replace(_, "//", "/"))
),
d = Table.FillDown(Table.FromRows(List.Zip({b, c}), {"Part No.", "Date"}), {"Part No."})
in
d
)[A]
),
{{"Part No.", 0}, {"Date", 0}}
)
in
Sol
Power Query solution 4 for Extract Part and Date, proposed by Eric Laforce:
let
Source = Excel.CurrentWorkbook(){[Name = "tData199"]}[Content],
CN = {"Part No.", "Date"},
Split = List.Transform(
Source[String],
each
let
L = Text.SplitAny(Text.Replace(_, "//", "/"), " ,"),
pNum = List.Select(L, each try Number.From(_) <> null otherwise false){0},
Dates = List.Select(L, each try Date.From(_) <> null otherwise false)
in
Table.FromRows(List.Transform(Dates, each {pNum, _}), CN)
),
T = Table.TransformColumnTypes(Table.Combine(Split), {"Date", type date}),
Sort = Table.Sort(T, List.Zip({CN, {0, 0}}))
in
Sort
Power Query solution 5 for Extract Part and Date, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
S = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
S1 = Table.ReplaceValue(S, "//", "/", Replacer.ReplaceText, {"String"}),
A = Table.AddColumn(S1, "T", each Text.Split([String], " ")),
B = Table.ExpandListColumn(A, "T"),
C = Table.DuplicateColumn(B, "T", "T1"),
D = Table.TransformColumnTypes(C, {{"T", type date}, {"T1", Int64.Type}}),
E = Table.ReplaceErrorValues(D, {{"T", null}, {"T1", null}}),
F = Table.Group(
E,
{"String"},
{{"Tbl", each _, type table [String = text, T = nullable date, T1 = nullable number]}}
),
G = Table.AddColumn(
F,
"Tb",
each Table.Distinct(Table.FillUp(Table.FillDown([Tbl], {"T", "T1"}), {"T", "T1"}))
),
H = Table.SelectColumns(G, {"Tb"}),
I = Table.ExpandTableColumn(H, "Tb", {"String", "T", "T1"}, {"String", "T", "T1"}),
J = Table.RenameColumns(I, {{"T", "Date"}, {"T1", "PartNo"}}),
K = Table.SelectColumns(J, {"PartNo", "Date"}),
L = Table.TransformColumnTypes(K, {{"Date", type date}}),
M = Table.Sort(L, {{"PartNo", Order.Ascending}, {"Date", Order.Ascending}})
in
M
Power Query solution 6 for Extract Part and Date, proposed by Yaroslav Drohomyretskyi:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Replace = Table.ReplaceValue(Source, "//", "/", Replacer.ReplaceText, {"String"}),
PartNo = Table.ExpandListColumn(
Table.AddColumn(
Replace,
"Part No.",
each List.Select(
Text.Split([String], " "),
each try Number.FromText(_) is number otherwise null
)
),
"Part No."
),
Date = Table.TransformColumnTypes(
Table.ExpandListColumn(
Table.AddColumn(
PartNo,
"Date",
each List.Select(Text.Split([String], " "), each try Date.FromText(_) is date otherwise null)
),
"Date"
),
{{"Date", type date}}
),
Sort = Table.Sort(Date, {{"Part No.", Order.Ascending}, {"Date", Order.Ascending}}),
Remove = Table.RemoveColumns(Sort, {"String"})
in
Remove
Power Query solution 7 for Extract Part and Date, proposed by Ahmed Ariem:
let
Source = Excel.CurrentWorkbook(){[Name = "tbl"]}[Content],
promt = Table.PromoteHeaders(Source, [PromoteAllScalars = true]),
AddColumn = Table.AddColumn(
promt,
"Custom",
each [
a = Text.Select([String], {"0" .. "9", "/", " "}),
b = Text.Split(a, " "),
c = List.Select(b, (x) => x <> ""),
d = List.Select(c, (x) => not Text.Contains(x, "/")),
e = List.Transform(
List.Select(c, (x) => Text.Contains(x, "/")),
(x) => Text.Replace(x, "//", "/")
),
f = Table.FromList(List.Zip({e, List.Repeat(d, List.Count(e))}), (x) => x)
][f]
),
#"Expanded Custom" = Table.ExpandTableColumn(
AddColumn,
"Custom",
{"Column1", "Column2"},
{"Column1", "Column2"}
),
#"Changed Type with Locale" = Table.TransformColumnTypes(
#"Expanded Custom",
{{"Column1", type date}},
"en-US"
),
#"Changed Type" = Table.TransformColumnTypes(
#"Changed Type with Locale",
{{"Column2", type number}}
),
#"Sorted Rows" = Table.Sort(
#"Changed Type",
{{"Column2", Order.Ascending}, {"Column1", Order.Ascending}}
)
in
#"Sorted Rows"
Solving the challenge of Extract Part and Date with Excel
Excel solution 1 for Extract Part and Date, proposed by Bo Rydobon 🇹🇭:
=LET(
a,
A2:A5,
SORT(
--TEXTSPLIT(
CONCAT(
REGEXREPLACE(
a,
"(d+/d+)/+(d+)|(.)",
"${3:+: "®EXEXTRACT(
a,
"d{3}"
)&"-20$2/$1}"
)
),
"-",
" ",
1
)
)
)
Excel solution 2 for Extract Part and Date, proposed by Rick Rothstein:
=SORT(
0+TEXTSPLIT(
TEXTJOIN(
"|",
,
MAP(
A2:A5,
LAMBDA(
x,
LET(
t,
TEXTSPLIT(
SUBSTITUTE(
x,
"//",
"/"
),
{" ",
","}
),
i,
TOROW(
IFERROR(
0+t,
1/0
),
3
),
TEXTJOIN(
"|",
,
MIN(
i
)&" "&FILTER(
i,
i>4000
)
)
)
)
)
),
" ",
"|"
)
)
Excel solution 3 for Extract Part and Date, proposed by محمد حلمي:
=SORT(
0+TEXTSPLIT(
TEXTJOIN(
"|",
,
MAP(
A2:A5,
LAMBDA(
x,
LET(
i,
TOCOL(
--
TEXTSPLIT(
SUBSTITUTE(
x,
"//",
"/"
),
{" ",
","}
),
3
),
TEXTJOIN(
"|",
,
MIN(
i
)&" "&FILTER(
i,
i>4000
)
)
)
)
)
),
" ",
"|"
)
)
Excel solution 4 for Extract Part and Date, proposed by محمد حلمي:
=SORT(
DROP(
REDUCE(
0,
A2:A5,
LAMBDA(
A,
V,
LET(
I,
TOCOL(
--TEXTSPLIT(
SUBSTITUTE(
V,
"//",
"/"
),
" ",
","
),
2
),
VSTACK(
A,
IF(
{1,
0},
FILTER(
I,
I<40000
),
FILTER(
I,
I>40000
)
)
)
)
)
),
1
),
{1,
2}
)
Excel solution 5 for Extract Part and Date, proposed by Kris Jaganah:
=VSTACK({"Part No.",
"Date"},
SORT(DROP(REDUCE("",
A2:A5,
LAMBDA(x,
y,
VSTACK(x,
LET(a,
REGEXEXTRACT(
y,
"[^A-z&, ]+",
1
),
b,
TOCOL(
IFS(
LEN(
a
)>3,
a
),
3
),
c,
INDEX(TOCOL(a/(LEN(
a
)=3),
3),
SEQUENCE(
ROWS(
b
),
,
,
0
)),
d,
--TEXTSPLIT(
ARRAYTOTEXT(
b
),
"/",
", ",
1
),
HSTACK(
c,
DATE(
20&TAKE(
d,
,
-1
),
TAKE(
d,
,
1
),
CHOOSECOLS(
d,
2
)
)
))))),
1),
{1,
2}))
Excel solution 6 for Extract Part and Date, proposed by Julian Poeltl:
=LET(
R,
WRAPROWS(
TEXTSPLIT(
TEXTJOIN(
",",
,
MAP(
A2:A5,
LAMBDA(
S,
LET(
SP,
TEXTSPLIT(
S,
,
{" ",
","}
)*1,
F,
SORT(
FILTER(
SP,
ISNUMBER(
SP
)
),
,
1
),
TEXTJOIN(
",",
,
INDEX(
F,
1
)&","&TOCOL(
DROP(
F,
1
)
)
)
)
)
)
),
","
),
2
),
VSTACK(
HSTACK(
"Part No.",
"Date"
),
SORT(
R*1
)
)
)
Excel solution 7 for Extract Part and Date, proposed by Oscar Mendez Roca Farell:
=DROP(
REDUCE(
"",
SUBSTITUTE(
A2:A5,
"//",
"/"
),
LAMBDA(
i,
x,
LET(
t,
TEXTSPLIT(
x,
"/",
ROW(
1:10
)-1,
1
),
n,
SORT(
--TEXTSPLIT(
x&"|",
"|",
t,
1
)
),
SORT(
VSTACK(
i,
IFNA(
HSTACK(
@n,
DROP(
n,
1
)
),
@n
)
)
)
)
)
),
-1
)
Excel solution 8 for Extract Part and Date, proposed by Sunny Baggu:
=SORT(
DROP(
REDUCE(
"",
A2:A5,
LAMBDA(
a,
v,
VSTACK(
a,
LET(
_ts,
& TEXTSPLIT(
SUBSTITUTE(
v,
"//",
"/"
),
,
{"part",
" ",
".",
", "},
1
),
_f,
FILTER(
_ts,
1 - ISERR(
_ts + 0
)
),
_c,
SEARCH(
"/",
_f
),
_p,
FILTER(
_f,
ISERR(
_c
)
),
_d,
FILTER(
_f,
1 - ISERR(
_c
)
),
HSTACK(
IF(
--_d,
_p
),
TEXT(
_d,
"m/d/yyyy"
)
)
)
)
)
),
1
),
{1,
2},
{1,
1}
)
Excel solution 9 for Extract Part and Date, proposed by Abdallah Ally:
=VSTACK({"Part No.",
"Date"},
SORT(DROP(REDUCE("",
A2:A5,
LAMBDA(x,
y,
LET(a,
SUBSTITUTE(
y,
"//",
"/"
),
b,
--REGEXEXTRACT(
a,
" (d+)(?!/)"
),
c,
TOCOL(
REGEXEXTRACT(
a,
"(d+/d+//?d+)",
1
)
),
VSTACK(x,
HSTACK(EXPAND(
b,
COUNTA(
c
),
,
b
),
--(TEXTBEFORE(
TEXTAFTER(
c,
"/"
),
"/"
)&"/"&LEFT(
c,
FIND(
"/",
c
)
)&TEXTAFTER(
c,
"/",
2,
,
1
))))))),
1),
{1,
2},
{1,
1}))
Excel solution 10 for Extract Part and Date, proposed by Md. Zohurul Islam:
=LET(
z,
A2:A5,
hdr,
HSTACK(
"Part no.",
"Date"
),
w,
{"no.",
"no",
"part",
"part number"},
u,
MAP(
z,
LAMBDA(
x,
LET(
a,
TEXTSPLIT(
SUBSTITUTE(
x,
",",
""
),
" "
),
b,
SUBSTITUTE(
FILTER(
a,
IFERROR(
SEARCH(
"/",
a
),
0
)>0
),
"//",
"/"
),
c,
ARRAYTOTEXT(
MAP(
b,
LAMBDA(
y,
TEXT(
TEXTJOIN(
"/",
,
CHOOSECOLS(
TEXTSPLIT(
y,
"/"
)+0,
2,
1,
3
)
)+0,
"dd/mm/yyy"
)
)
)
),
d,
FILTER(
a,
ISNUMBER(
ABS(
TEXTSPLIT(
a,
w
)
)
)
),
e,
FILTER(
d,
ISERROR(
SEARCH(
"/",
d
)
)
),
f,
e&", "&c,
f
)
)
),
v,
DROP(
REDUCE(
"",
u,
LAMBDA(
x,
y,
LET(
a,
TEXTSPLIT(
y,
", "
),
b,
ABS(
TAKE(
a,
,
1
)
),
c,
IFNA(
HSTACK(
b,
ABS(
TOCOL(
DROP(
a,
,
1
)
)
)
),
b
),
VSTACK(
x,
c
)
)
)
),
1
),
VSTACK(
hdr,
SORT(
v,
{1,
2}
)
)
)
Excel solution 11 for Extract Part and Date, proposed by Songglod P.:
=LET(
str,
TEXTJOIN(
";",
TRUE,
A2:A5
),
t,
--TEXTSPLIT(
SUBSTITUTE(
str,
"//",
"/"
),
{",",
" "},
";",
TRUE
),
arr,
BYROW(
t,
LAMBDA(
x,
LET(
txt,
SORT(
FILTER(
x,
ISNUMBER(
x
)
),
,
,
TRUE
),
ARRAYTOTEXT(
DROP(
TAKE(
txt,
,
1
)&"|"&TOCOL(
txt
),
1
)
)
)
)
),
SORT(
--TEXTSPLIT(
ARRAYTOTEXT(
arr
),
"|",
", "
)
)
)
Solving the challenge of Extract Part and Date with Python
Python solution 1 for Extract Part and Date, proposed by Konrad Gryczan, PhD:
import pandas as pd
import re
path = "PQ_Challenge_199.xlsx"
input = pd.read_excel(path, usecols="A", nrows = 4)
test = pd.read_excel(path, usecols="C:D", nrows = 8)
pattern_no = r"d{3}"
pattern_date = r"d{1,2}/+d{1,2}/+d{2}"
result = input.copy()
result['Part No.'] = result['String'].str.findall(pattern_no)
result['Date'] = result['String'].str.findall(pattern_date)
result = result.explode('Part No.').explode('Date')
result['Date'] = result['Date'].str.replace("//", "/")
result = result.drop(columns=['String'])
result['Part No.'] = pd.to_numeric(result['Part No.'])
result['Date'] = pd.to_datetime(result['Date'], format="%m/%d/%y")
result = result.sort_values(['Part No.', 'Date']).reset_index(drop=True)
print(result.equals(test)) # True
Solving the challenge of Extract Part and Date with Python in Excel
Python in Excel solution 1 for Extract Part and Date, proposed by Abdallah Ally:
import re
# Select a data range
df = xl("A1:A5", headers=True)
# Perform data wrangling
df['Part No.'] = df['String'].map(
lambda x: re.findall(r' (d+)(?!/)', x)[0]
)
df['Date'] = df['String'].map(
lambda x: [y.replace('//', '/') for y in re.findall(r'(d+/d+//?d+)', x)]
)
df = df.explode(column='Date')
df = df.sort_values(by=['Part No.', 'Date'], ignore_index=True)
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
df = df[['Part No.', 'Date']]
df
Solving the challenge of Extract Part and Date with R
R solution 1 for Extract Part and Date, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "Power Query/PQ_Challenge_199.xlsx"
input = read_excel(path, range = "A1:A5")
test = read_excel(path, range = "C1:D8")
pattern_no = "\d{3}"
pattern_date = "\d{1,2}/+\d{1,2}/+\d{2}"
result = input %>%
mutate(`Part No.` = str_extract_all(String, pattern_no),
Date = str_extract_all(String, pattern_date)) %>%
unnest(Date, `Part No.`) %>%
mutate(Date = str_replace_all(Date, "//", "/")) %>%
select(-String) %>%
mutate(`Part No.` = as.numeric(`Part No.`),
Date = as.POSIXct(Date, format = "%m/%d/%y", tz = "UTC")) %>%
arrange(`Part No.`, Date)
identical(result, test)
# [1] TRUE
R solution 2 for Extract Part and Date, proposed by Anil Kumar Goyal:
library(readxl)
library(tidyverse)
library(rebus)
df <- read_excel("PQ/PQ_Challenge_199.xlsx", range = cell_cols("A"))
d_rx <- MONTH_IN %R% NOT_WRD %R% DAY_IN %R% one_or_more(NOT_WRD) %R% CENTURY
df %>%
transmute(Part = map_chr(String, ~str_match(., "part\D+(\d+)")[,2]),
Date = str_extract_all(String, d_rx)) %>%
unnest_longer(Date) %>%
# Optional
mutate(Date = mdy(Date)) %>%
arrange(Part, Date)
&&
