Extract the years from the given data. The year should be valid in Excel. For example – Excel doesn’t support years below 1900, hence 1899 is not a valid year in Excel. If there is a range of years extracted, convert the range to all years in between. For Example 2018-2021=> 2018, 2019, 2020, 2021
📌 Challenge Details and Links
ExcelBI Excel Challenge Number: 702
Challenge Difficulty: ⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Extract Valid Year Ranges with Power Query
Power Query solution 1 for Extract Valid Year Ranges, proposed by Seokho MOON:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Res = Table.AddColumn(Source, "Answer Expected", Fun)[[#"Answer Expected"]],
Fun = each [
A = {"a" .. "z"} & {"A" .. "Z"} & {" "},
B = Text.Remove([Data], A),
C = if Text.Length(Text.AfterDelimiter(B, "-")) = 2 then Text.Start(B, 2) else "",
D = Text.Replace(Text.Replace(B, "-", ".." & C), "&", ","),
E = Expression.Evaluate("{" & D & "}"),
F = List.Select(E, each _ >= 1900 and _ <= 9999),
G = List.Transform(F, each Text.From(_)),
H = Text.Combine(G, ", ")
][H]
in
Res
Power Query solution 2 for Extract Valid Year Ranges, proposed by Maciej Kopczyński:
let
source = Excel.CurrentWorkbook(){[Name = "tblStart"]}[Content],
A = Table.AddIndexColumn(
Table.TransformColumns(
source,
{{"Data", each Text.Trim(Text.Remove(Text.Lower(_), {"a" .. "z"}))}}
),
"Sort",
1,
1
),
B = Table.TransformColumns(
Table.SelectRows(A, each Text.Contains([Data], "-")),
{
{
"Data",
each
if Text.Length(Text.Trim(Text.Split(_, "-"){1})) = 2 then
List.Numbers(
Number.From(Text.Split(_, "-"){0}),
Number.From(Text.Split(_, "-"){1})
- Number.From(Text.End(Text.Split(_, "-"){0}, 2))
+ 1
)
else
List.Numbers(
Number.From(Text.Split(_, "-"){0}),
Number.From(Text.Split(_, "-"){1}) - Number.From(Text.Split(_, "-"){0}) + 1
)
}
}
),
C = Table.TransformColumns(
B,
{"Data", each Text.Combine(List.Transform(_, Text.From), ", "), type text}
),
D = Table.TransformColumns(
Table.SelectRows(A, each not Text.Contains([Data], "-")),
{
{
"Data",
each Text.Combine(
List.Select(
List.Transform(Text.SplitAny(_, "&,"), each Text.Trim(_)),
each Number.From(_) >= 1900 and Number.From(_) <= 3000
),
", "
)
}
}
),
E = Table.Sort(D & C, {{"Sort", Order.Ascending}})[[Data]]
in
E
Power Query solution 3 for Extract Valid Year Ranges, proposed by Aleksandar Kovacevic:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Fx = each [
a = Text.Trim(Text.Remove(Text.Remove([Data], {"A" .. "Z"}), {"a" .. "z"})),
b =
if Text.Contains(a, "-") and Text.Length(a) = 7 then
Text.Insert(a, 5, Text.Start(a, 2))
else
a,
c = Text.SplitAny(Text.Replace(b, "-", ".."), ",&"),
d = List.Transform(c, each Expression.Evaluate("{" & _ & "}")),
e = Text.Combine(
List.Transform(List.Select(List.Combine(d), each _ > 1900 and _ < 9999), Text.From),
", "
)
][e],
Res = Table.AddColumn(Source, "Answer Expected", Fx)[[Answer Expected]]
in
Res
Solving the challenge of Extract Valid Year Ranges with Excel
Excel solution 1 for Extract Valid Year Ranges, proposed by Bo Rydobon 🇹🇭:
=MAP(A2:A9,LAMBDA(a,IFNA(ARRAYTOTEXT(MAP(REGEXEXTRACT(a,"(19|20)dd(?!d)(-d+)?",1),LAMBDA(b,ARRAYTOTEXT(SEQUENCE(MOD(RIGHT(b,2)-LEFT(b,2),100)+1,,LEFT(b,4)))))),"")))
Excel solution 2 for Extract Valid Year Ranges, proposed by 🇰🇷 Taeyong Shin:
=MAP(
A2:A9,
LAMBDA(
x,
LET(
r,
REGEXEXTRACT(
x,
"(?:(?:19|20)d{2}|(?
Excel solution 3 for Extract Valid Year Ranges, proposed by Kris Jaganah:
=MAP(A2:A9,
LAMBDA(x,
LET(a,
REGEXEXTRACT(
x,
"[0-9-]+",
1
),
b,
--TEXTSPLIT(
a,
"-"
),
c,
--TEXTAFTER(
@a,
"-",
,
,
,
0
),
d,
@(LEFT(
b,
4-LEN(
c
)
)&c),
TEXTJOIN(", ",
,
IFS(c,
SEQUENCE(
d-@b+1,
,
@b
),
(b>1900)*(b<2100),
a,
1,
"")))))
Excel solution 4 for Extract Valid Year Ranges, proposed by Duy Tùng:
=MAP(TRIM(
REGEXREPLACE(
A2:A9,
"[A-z]",
)
),
LAMBDA(x,
LET(a,
TEXTSPLIT(
x,
{", ",
"-",
" & "}
),
b,
SUM(
RIGHT(
a,
2
)*{-1,
1}
),
ARRAYTOTEXT(IF(COUNT(
FIND(
"-",
x
)
),
SEQUENCE(
IF(
b>0,
b,
SUM(
a*{-1,
1}
)
)+1,
,
@a
),
FILTER(a,
(LEN(
a
)<5)*(--a>1900),
""))))))
Excel solution 5 for Extract Valid Year Ranges, proposed by Sunny Baggu:
=MAP(
A2:A9,
LAMBDA(t,
LET(
_a,
TEXTSPLIT(
UPPER(
t
),
,
VSTACK(
"&",
" ",
", ",
CHAR(
SEQUENCE(
26,
,
65
)
)
),
1
),
_b,
NOT(
ISNUMBER(
SEARCH(
"-",
_a
)
)
) * (LEN(
_a
) > 4),
_c,
FILTER(
_a,
_b = 0,
""
),
_ts,
TEXTSPLIT(
INDEX(
_c,
1,
1
),
,
"-"
) + 0,
_ta,
INDEX(
_ts,
1,
1
),
_tb,
INDEX(
_ts,
2,
1
),
_tc,
IF(
LEN(
_tb
) = 2,
2000 + _tb,
_tb
),
_r,
IFERROR(
_tc - _ta + 1,
0
),
_d,
IFERROR(
IF(
MID(
_c,
5,
1
) = "-",
SEQUENCE(
_r,
,
_ta
),
--_c
),
""
),
TEXTJOIN(
", ",
,
FILTER(
_d,
_d > 1900,
""
)
)
)
)
)
Excel solution 6 for Extract Valid Year Ranges, proposed by Md. Zohurul Islam:
=MAP(A2:A9,
LAMBDA(x,
LET(
a,
TRIM(
REGEXREPLACE(
x,
"[aA-zZ]",
)
),
b,
TEXTAFTER(
a,
"-"
),
c,
LEN(
b
),
d,
IFERROR(
IFS(
c<4,
TEXTBEFORE(
a,
"-"
)&"-"&LEFT(
a,
2
)&b
),
a
),
s,
IFERROR(
SEARCH(
"-",
a
),
0
),
e,
IF(
s=0,
0,
SUM(
TEXTSPLIT(
d,
"-"
)*{-1,
1}
)+1
),
f,
TEXTBEFORE(
d,
"-"
),
g,
IFERROR(
ARRAYTOTEXT(
SEQUENCE(
,
e,
0
)+f
),
d
),
h,
--TEXTSPLIT(
g,
{" & ",
", "}
),
j,
ARRAYTOTEXT(FILTER(h,
(h>1899)*(LEN(
h
)=4),
"")),
j)))
Excel solution 7 for Extract Valid Year Ranges, proposed by Hamidi Hamid:
=LET(x,
DROP(
TEXTSPLIT(
CONCAT(
"/"&A2:A9
),
{", ",
"-",
" "},
"/"
),
1
),
u,
MAP(
x,
LAMBDA(
a,
BYROW(
IFERROR(
MID(
a,
SEQUENCE(
,
30
),
1
)*1,
""
),
CONCAT
)
)
),
g,
IFERROR(IF((LEN(
u
)>4)+(LEFT(
u,
2
)*1<19),
"",
u)*1,
0),
v,
IFERROR(
SEARCH(
"-",
A2:A9
)*1,
0
),
k,
IF(
g=0,
0,
TEXT(
"01/01/"&g,
"yyyy"
)*1
),
r,
IF(
k=0,
"",
k
),
z,
IF(
v=0,
BYROW(
r,
LAMBDA(
a,
TEXTJOIN(
", ",
,
a
)
)
),
""
),
pk,
BYROW(
BYROW(
k,
LAMBDA(
a,
TEXTJOIN(
", ",
,
SEQUENCE(
,
MAX(
a
)-LARGE(
a,
2
)+1,
LARGE(
a,
2
),
1
)
)
)
),
CONCAT
),
q,
IF(
z<>"",
z,
pk
),
IF(
LEN(
q
)=1,
"",
q
))
Excel solution 8 for Extract Valid Year Ranges, proposed by Asheesh Pahwa:
=LET(alp,VSTACK(CHAR(SEQUENCE(26,,97)),CHAR(SEQUENCE(26,,65))),m,MAP(A2:A9,LAMBDA(v,CONCAT(TEXTSPLIT(v,alp," ",1)))),
_m,MAP(m,LAMBDA(a,LET(t,--TOROW(TEXTSPLIT(a,{",","&"})),
IFERROR(ARRAYTOTEXT(FILTER(t,(LEN(t)<5)*(t>1900),"")),a)))),
MAP(_m,LAMBDA(a,LET(f,ISNUMBER(FIND("-",a)),
m,MID(a,1,4),_m,MID(a,6,4),I,IF(LEN(_m)<4,20&_m,_m),
IF(f,ARRAYTOTEXT(SEQUENCE(I-m+1,,m)),a)))))
Excel solution 9 for Extract Valid Year Ranges, proposed by Guillermo Arroyo:
=MAP(
A2:A9,
LAMBDA(
m,
IFNA(
TEXTJOIN(
", ",
,
LET(
r,
REGEXEXTRACT(
m,
"(?
Excel solution 10 for Extract Valid Year Ranges, proposed by Fredson Alves Pinho:
=MAP(
A2:A9,
LAMBDA(
x,
LET(
a,
REGEXEXTRACT(
x,
"(?
Excel solution 11 for Extract Valid Year Ranges, proposed by Craig Runciman:
=LET(
a,
REGEXREPLACE(
TRIM(
REGEXREPLACE(
A2:A9,
"[^0-9-, ]|18d{2}|d{5,},*",
""
)
),
"(?<=d) ",
", "
),
BYROW(
a,
LAMBDA(
b,
IF(
ISERROR(
FIND(
"-",
@b
)
),
@b,
@TEXTJOIN(
", ",
,
SEQUENCE(
MOD(
--TEXTAFTER(
@b,
& "-"
)+1000,
2000
)-MOD(
--TEXTBEFORE(
@b,
"-"
)+1000,
2000
)+1,
,
--TEXTBEFORE(
@b,
"-"
)
)
)
)
)
)
)
Solving the challenge of Extract Valid Year Ranges with Python
Python solution 1 for Extract Valid Year Ranges, proposed by Konrad Gryczan, PhD:
import pandas as pd
import re
import numpy as np
path = "702 Extract Year.xlsx"
input = pd.read_excel(path, usecols="A", nrows=9, names=["Data"])
test = pd.read_excel(path, usecols="C", nrows=9, names=["Answer Expected"])
def extract_years(data):
years = re.findall(r"d{2,}", data)
years = [int(year) for year in years]
years = [year + 2000 if year < 100 else year for year in years]
years = [year if 1900 < year < 10000 else None for year in years]
return [year for year in years if year is not None]
def has_range(data):
return bool(re.search(r"d{2,}-d{2,}", data))
processed = []
for _, row in input.iterrows():
data = row["Data"]
years = extract_years(data)
if years:
if has_range(data):
years = list(range(min(years), max(years) + 1))
processed.append({"Data": data, "years": ", ".join(map(str, years))})
else:
processed.append({"Data": data, "years": None})
result = pd.DataFrame(processed)
res = input.merge(result, on="Data", how="left")
print(res['years'].equals(test['Answer Expected']))
Python solution 2 for Extract Valid Year Ranges, proposed by Alejandro Campos:
", ".join(map(str, sorted({y for start, end in re.findall(r'(?
Python solution 3 for Extract Valid Year Ranges, proposed by Ernesto Vega Castillo:
import pandas as pd
import re
from datetime import datetime
current_year = datetime.now().year
ruta = r"C:excelchallenge702.xlsx"
df = pd.read_excel(ruta, header=None)
df_rango = df.loc[1:8, 0] # Ajusta índices según el tamaño de tus datos
# print(df_rango)
def extract_years(df_rango):
def get_years(line):
matches = re.findall(r'(d{4})(?:-(d{2,4}))?', line)
years = set()
for start, end in matches:
end = start[:2] + end if end and len(end) == 2 else end or start
years.update(range(int(start), int(end) + 1))
return sorted(y for y in years if 1900 <= y <= current_year)
return pd.DataFrame({
"Answer Expected": [", ".join(map(str, get_years(line))) for line in df_rango]
})
result = extract_years(df_rango)
print(result)
Saludos!
Solving the challenge of Extract Valid Year Ranges with Python in Excel
Python in Excel solution 1 for Extract Valid Year Ranges, proposed by Alejandro Campos:
import re
def extract_years_per_line(data):
return pd.DataFrame({
"Extracted Years": [
", ".join(map(str, sorted({y for start, end in re.findall(r'(d{4})(?:-(d{2,4}))?', line)
for y in range(int(start), (int(start[:2] + end) if end and len(end) == 2 else int(end or start)) + 1)
if 1900 <= y <= 9999})))
if (years := {y for start, end in re.findall(r'(d{4})(?:-(d{2,4}))?', line)
for y in range(int(start), (int(start[:2] + end) if end and len(end) == 2 else int(end or start)) + 1)
if 1900 <= y <= 9999}) else ""
for line in data]})
df_result = extract_years_per_line(xl("A2:A9")[0])
df_result
Solving the challenge of Extract Valid Year Ranges with R
R solution 1 for Extract Valid Year Ranges, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "Excel/702 Extract Year.xlsx"
input = read_excel(path, range = "A1:A9")
test = read_excel(path, range = "C1:C9")
result = input %>%
mutate(years = str_extract_all(Data, "\d{2,}")) %>%
unnest(years) %>%
mutate(years = as.numeric(years)) %>%
mutate(years = ifelse(years < 100, years + 2000, years)) %>%
mutate(
years = ifelse(years > 1900 & years < 10000, years, NA),
has_range = ifelse(str_detect(Data, "\d{2,}-\d{2,}"), TRUE, FALSE)
) %>%
na.omit() %>%
group_by(Data) %>%
summarise(
years = if (any(has_range)) {
seq(min(years, na.rm = TRUE), max(years, na.rm = TRUE)) %>%
paste(collapse = ", ")
} else {
paste(years, collapse = ", ")
}
)
res = input %>%
left_join(result, by = "Data")
all.equal(res$years, test$`Answer Expected`)
# TRUE
&&
