Extract all the Dates from the text provided in the question table in different formats. Be careful with the use of the number for other purpose. Note: The text is generated by ChatGPT.
📌 Challenge Details and Links
Challenge Number: 78
Challenge Difficulty: ⭐⭐⭐⭐
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Extract From Text! Part 2 with Power Query
Power Query solution 1 for Extract From Text! Part 2, proposed by Zoran Milokanović:
let
Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content][Text]{0},
F = (t, p) =>
Web.Page(
"<script> var t = """
& t
& """; var p = new RegExp('"
& p
& "', 'g'); document.write(t.match(p).join('; ')); </script>"
)[Data]{0}[Children]{0}[Children]{1}[Text]{0},
S = Text.Split(
F(
Source,
"bd{1,2}[-/.]d{1,2}[-/.]d{4}|d{4}[-/.]d{2}[-/.]d{2}|(?:January|February|March|April|May|June|July|August|September|October|November|December) d{1,2}, d{4}|w+ d{1,2}(?:st|nd|rd|th), d{4}|w+ d{1,2}(?:st|nd|rd|th) to w+ d{1,2}(?:st|nd|rd|th), d{4}"
),
"; "
)
in
S
Power Query solution 2 for Extract From Text! Part 2, proposed by Brian Julius:
lete solution, but asked me if I wanted to expand the Regex to include other potentially likely date delimiters (I said "sure, thanks"...).
Rodan's solution:
let
Source = DataRaw,
RScript = R.Execute("library(stringr)#(lf)df <- dataset#(lf)target <- ""\b(\d{1,2}[-/.]\d{1,2}[-/.]\d{4}|\d{4}[-/.]\d{2}[-/.]\d{2}|(?:January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}|\w+ \d{1,2}(?:st|nd|rd|th) to \w+ \d{1,2}(?:st|nd|rd|th), \d{4})\b""#(lf)df$Result <- sapply(str_extract_all(df$Text, target ), function(x) paste(x, collapse = ""%""))#(lf)df",[dataset=Source]){[Name="df"]}[Value],
SplitToRows = Table.ExpandListColumn(Table.TransformColumns(RScript, {{"Result", Splitter.SplitTextByDelimiter("%", QuoteStyle.Csv)}}), "Result"),
Clean = Table.RemoveColumns(SplitToRows,{"Text"})
in
Clean
Abu Bakar N. Alvi - per our earlier conversation...
Power Query solution 3 for Extract From Text! Part 2, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
Sol = Table.Combine(Table.AddColumn(Source, "A", each
let
a = Splitter.SplitTextByAnyDelimiter({"on ", "from ", "by ", "is ", "for "})([Text]),
b = List.Select(a, each List.AnyTrue(List.Transform({"0".."9"}, (x)=> Text.Contains(_,x) ))),
c = List.Transform(b, each Splitter.SplitTextByDelimiter(" at")(Text.TrimEnd(_, {"A".."z"," ",",", "."})){0}),
d = List.Transform({1..12}, each Date.MonthName(
hashtag
#date(2024,_,1))),
e = List.Select(c, each List.AnyTrue(List.Transform(d&{"/","-"}, (x)=> Text.Contains(_,x)))),
f = Table.FromColumns({e}, {"Result"})
in f)[A])
in
Sol
Siento el código un tanto forzado, pero funciona para el reto.
Solving the challenge of Extract From Text! Part 2 with Excel
Excel solution 1 for Extract From Text! Part 2, proposed by Bo Rydobon 🇹🇭:
=LET(
y,
TEXTSPLIT(
REGEXREPLACE(
B3,
"[d/-]{4,}",
"$0|"
),
,
"|"
),
d,
TEXTAFTER(
y,
" ",
-1
), TOCOL(
IFNA(
REGEXREPLACE(
y,
".*?("&TEXTJOIN(
"|",
,
TEXT(
SEQUENCE(
12
)*29,
"mmmm"
)
)&")",
"$1",
1
),
IFS(
LEN(
--d
)
Excel solution 2 for Extract From Text! Part 2, proposed by محمد حلمي:
=LET( d,
TEXTSPLIT(
B3,
,
{".",
", f",
" a",
"on ",
"by ",
"om ",
"is ",
"or "}
), e,
FILTER(
d,
IFERROR(
FIND(
2024,
d
),
)
), SEQUENCE(
ROWS(
e
)
)&". "&e
)
Excel solution 3 for Extract From Text! Part 2, proposed by Oscar Mendez Roca Farell:
=LET(
t,
TEXT(
--REPT(
ROW(
1:12
),
{1,
1}
)&2024,
"mmmm"
)&{" ",
"_"} ,
r ,
REDUCE(
SUBSTITUTE(
SUBSTITUTE(
B3,
", 2024",
"_2024 "
),
" to ",
"_to_"
),
ROW(
1:12
),
LAMBDA(
i,
x,
SUBSTITUTE(
i,
INDEX(
t,
x,
1
),
INDEX(
t,
x,
2
)
)
)
),
s,
SUBSTITUTE(
SUBSTITUTE(
TEXTSPLIT(
r,
,
" "
),
"_",
" "
),
" 2024",
", 2024"
),
FILTER(
s,
ISNUMBER(
FIND(
2024,
s
)
)
)
)
Excel solution 4 for Extract From Text! Part 2, proposed by Julian Poeltl:
=LET(
T,
B3,
S,
TEXTSPLIT(
T,
"."
),
Y,
SEARCH(
2024,
S
),
SP,
IFERROR(
SEARCH(
" ",
S,
Y
),
Y+3
),
L,
LEFT(
S,
SP
),
D,
IFERROR(
TEXTAFTER(
L,
{" on ",
" by ",
" is ",
" from "}
),
TEXTAFTER(
L,
" for "
)
),
F,
TOCOL(
D,
3
),
TF,
TRIM(
F
),
SEQUENCE(
ROWS(
F
)
)&". "&TRIM(
IF(
RIGHT(
TF,
1
)=",",
LEFT(
TF,
LEN(
TF
)-1
),
TF
)
)
)
Excel solution 5 for Extract From Text! Part 2, proposed by JvdV –:
=TOCOL(
REGEXEXTRACT(
B3,
"((dd/dd/|(bw+s+dd?(?:st|nd|rd|th)?)(s+(?3))?, )d{4}|d{4}-dd-dd)",
1
)
)
Excel solution 6 for Extract From Text! Part 2, proposed by Sunny Baggu:
=LET(
t, B3,
l, SEQUENCE(LEN(t)),
_m, TEXT(DATE(2024, SEQUENCE(12), 1), "mmmm"),
_a, TOCOL(SEARCH(_m, t, l), 3),
_b, TOCOL(UNIQUE(SEARCH(2024, t, l)), 3) + LEN(2024),
_c, XLOOKUP(_a, _b, _b, , 1),
_d, MID(t, _a, _c - _a),
_e, FILTER(_b, ISNA(XMATCH(_b, _c))) - 4,
_f, TOCOL(UNIQUE(SEARCH(" ", t, l)), 3),
_g, XLOOKUP(_e, _f, _f, , -1),
_h, XLOOKUP(_e, _f, _f, , 1),
_i, MAP(_e, _g, LAMBDA(a, b, MID(t, MIN(a, b) + 1, ABS(a - b) + 3))),
_j, MAP(_e, _h, LAMBDA(a, b, MID(t, MIN(a, b), ABS(a - b)))),
r, VSTACK(_d, _i, _j),
DROP(SORT(FILTER(HSTACK(VSTACK(_a, _g, _h), r), LEN(r) > 5)), , 1)
)
Excel solution 7 for Extract From Text! Part 2, proposed by Bilal Mahmoud kh.:
=LET(a,
MAP({"on ";"by ";"from ";"is ";"for "},
LAMBDA(x,
TEXTAFTER(A3,
x,
IF((x="is "),
2,
IF(
x="for ",
3,
1
))))),
IF(
TEXTBEFORE(
a,
"2024"
)="",
TEXTBEFORE(
a,
" "
),
TEXTBEFORE(
a,
"2024"
)&"2024"
))
Excel solution 8 for Extract From Text! Part 2, proposed by El Badlis Mohd Marzudin:
=LET(
m,
TEXTJOIN(
"|",
,
UNIQUE(
TEXT(
SEQUENCE(
365
),
"MMMM"
)
)
),
r,
"b(d{2}/d{2}/d{4}|(?:"& m &") d{1,2}(?:st|nd|rd|th)?, d{4}|(?:"& m &") d{1,2}(?:st|nd|rd|th)? to 1 d{1,2}(?:st|nd|rd|th)?, d{4}|d{4}-d{2}-d{2})b",
a,
TOCOL(
REGEXEXTRACT(
B3,
r,
1
)
),
SEQUENCE(
ROWS(
a
)
)&". "&a
)
Excel solution 9 for Extract From Text! Part 2, proposed by Hussein SATOUR:
=LET(
M,
"(January|February|March|April|May|June|July|August|September|October|November|December)",
R,
REGEXEXTRACT(
B3,
"d{1,2}/d{1,2}/d{4}|" & M & "s+d{1,2}(st|nd|rd|th)?,s+d{4}|" & M & "s+d{1,2}(st|nd|rd|th)?s+tos+" & M & "s+d{1,2}(st|nd|rd|th)?,s+d{4}|d{4}-d{1,2}-d{1,2}",
1
),
SEQUENCE(
COUNTA(
R
)
)&". "&TOCOL(
R
)
)
Solving the challenge of Extract From Text! Part 2 with Python
Python solution 1 for Extract From Text! Part 2, proposed by Konrad Gryczan, PhD:
import pandas as pd
import re
path = "CH-078 Extract from Text 2.xlsx"
input = pd.read_excel(path, usecols="B", header=None, skiprows=2, nrows=1).iloc[0, 0]
test = pd.read_excel(path, usecols="B", skiprows=5, nrows=6)
test["Email Address"] = test["Email Address"].str[3:]
test = test.sort_values(by="Email Address").reset_index(drop=True)
patterns = [
r"d{4}-d{2}-d{2}",
r"d{2}/d{2}/d{4}",
r"bw+ d{1,2}[a-z]*?(?: to w+ d{1,2}[a-z]*)?, d{4}b"
]
result = pd.DataFrame()
for pattern in patterns:
result = result.append(pd.DataFrame(re.findall(pattern, input), columns=["Email Address"]))
result = result.sort_values(by="Email Address").reset_index(drop=True)
print(result.equals(test)) # True
Solving the challenge of Extract From Text! Part 2 with R
R solution 1 for Extract From Text! Part 2, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "files/CH-078 Extract from Text 2.xlsx"
input = read_xlsx(path, range = "B3", col_names = F) %>%
pull()
test = read_xlsx(path, range = "B6:B11") %>%
mutate(`Email Address` = str_sub(`Email Address`, 4))
patterns = c(
"\d{4}-\d{2}-\d{2}",
"\d{2}\/\d{2}\/\d{4}",
"\b\w+ \d{1,2}[a-z]*?(?: to \w+ \d{1,2}[a-z]*)?, \d{4}\b"
)
result = input %>%
str_extract_all(str_c(patterns, collapse = "|")) %>%
map(~ .x[.x != ""])
result = tibble(`Email Address` = result[[1]])
identical(result, test)
# [1] TRUE
