Home » Extract Valid Year Ranges

Extract Valid Year Ranges

Extract the years from the given data. The year should be valid in Excel. For example – Excel doesn’t support years below 1900, hence 1899 is not a valid year in Excel. If there is a range of years extracted, convert the range to all years in between. For Example 2018-2021=> 2018, 2019, 2020, 2021

📌 Challenge Details and Links
ExcelBI Excel Challenge Number: 702
Challenge Difficulty: ⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Extract Valid Year Ranges with Power Query

Power Query solution 1 for Extract Valid Year Ranges, proposed by Seokho MOON:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Res = Table.AddColumn(Source, "Answer Expected", Fun)[[#"Answer Expected"]], 
  Fun = each [
    A = {"a" .. "z"} & {"A" .. "Z"} & {" "}, 
    B = Text.Remove([Data], A), 
    C = if Text.Length(Text.AfterDelimiter(B, "-")) = 2 then Text.Start(B, 2) else "", 
    D = Text.Replace(Text.Replace(B, "-", ".." & C), "&", ","), 
    E = Expression.Evaluate("{" & D & "}"), 
    F = List.Select(E, each _ >= 1900 and _ <= 9999), 
    G = List.Transform(F, each Text.From(_)), 
    H = Text.Combine(G, ", ")
  ][H]
in
  Res
Power Query solution 2 for Extract Valid Year Ranges, proposed by Maciej Kopczyński:
let
  source = Excel.CurrentWorkbook(){[Name = "tblStart"]}[Content], 
  A = Table.AddIndexColumn(
    Table.TransformColumns(
      source, 
      {{"Data", each Text.Trim(Text.Remove(Text.Lower(_), {"a" .. "z"}))}}
    ), 
    "Sort", 
    1, 
    1
  ), 
  B = Table.TransformColumns(
    Table.SelectRows(A, each Text.Contains([Data], "-")), 
    {
      {
        "Data", 
        each 
          if Text.Length(Text.Trim(Text.Split(_, "-"){1})) = 2 then
            List.Numbers(
              Number.From(Text.Split(_, "-"){0}), 
              Number.From(Text.Split(_, "-"){1})
                - Number.From(Text.End(Text.Split(_, "-"){0}, 2))
                + 1
            )
          else
            List.Numbers(
              Number.From(Text.Split(_, "-"){0}), 
              Number.From(Text.Split(_, "-"){1}) - Number.From(Text.Split(_, "-"){0}) + 1
            )
      }
    }
  ), 
  C = Table.TransformColumns(
    B, 
    {"Data", each Text.Combine(List.Transform(_, Text.From), ", "), type text}
  ), 
  D = Table.TransformColumns(
    Table.SelectRows(A, each not Text.Contains([Data], "-")), 
    {
      {
        "Data", 
        each Text.Combine(
          List.Select(
            List.Transform(Text.SplitAny(_, "&,"), each Text.Trim(_)), 
            each Number.From(_) >= 1900 and Number.From(_) <= 3000
          ), 
          ", "
        )
      }
    }
  ), 
  E = Table.Sort(D & C, {{"Sort", Order.Ascending}})[[Data]]
in
  E
Power Query solution 3 for Extract Valid Year Ranges, proposed by Aleksandar Kovacevic:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Fx = each [
    a = Text.Trim(Text.Remove(Text.Remove([Data], {"A" .. "Z"}), {"a" .. "z"})), 
    b = 
      if Text.Contains(a, "-") and Text.Length(a) = 7 then
        Text.Insert(a, 5, Text.Start(a, 2))
      else
        a, 
    c = Text.SplitAny(Text.Replace(b, "-", ".."), ",&"), 
    d = List.Transform(c, each Expression.Evaluate("{" & _ & "}")), 
    e = Text.Combine(
      List.Transform(List.Select(List.Combine(d), each _ > 1900 and _ < 9999), Text.From), 
      ", "
    )
  ][e], 
  Res = Table.AddColumn(Source, "Answer Expected", Fx)[[Answer Expected]]
in
  Res

Solving the challenge of Extract Valid Year Ranges with Excel

Excel solution 1 for Extract Valid Year Ranges, proposed by Bo Rydobon 🇹🇭:
=MAP(A2:A9,LAMBDA(a,IFNA(ARRAYTOTEXT(MAP(REGEXEXTRACT(a,"(19|20)dd(?!d)(-d+)?",1),LAMBDA(b,ARRAYTOTEXT(SEQUENCE(MOD(RIGHT(b,2)-LEFT(b,2),100)+1,,LEFT(b,4)))))),"")))
Excel solution 2 for Extract Valid Year Ranges, proposed by 🇰🇷 Taeyong Shin:
=MAP(
    A2:A9,
    LAMBDA(
        x,
        LET(
            r,
            REGEXEXTRACT(
                x,
                "(?:(?:19|20)d{2}|(?
Excel solution 3 for Extract Valid Year Ranges, proposed by Kris Jaganah:
=MAP(A2:A9,
    LAMBDA(x,
    LET(a,
    REGEXEXTRACT(
        x,
        "[0-9-]+",
        1
    ),
    b,
    --TEXTSPLIT(
        a,
        "-"
    ),
    c,
    --TEXTAFTER(
        @a,
        "-",
        ,
        ,
        ,
        0
    ),
    d,
    @(LEFT(
        b,
        4-LEN(
            c
        )
    )&c),
    TEXTJOIN(", ",
    ,
    IFS(c,
    SEQUENCE(
        d-@b+1,
        ,
        @b
    ),
    (b>1900)*(b<2100),
    a,
    1,
    "")))))
Excel solution 4 for Extract Valid Year Ranges, proposed by Duy Tùng:
=MAP(TRIM(
    REGEXREPLACE(
        A2:A9,
        "[A-z]",
        
    )
),
    LAMBDA(x,
    LET(a,
    TEXTSPLIT(
        x,
        {", ",
        "-",
        " & "}
    ),
    b,
    SUM(
        RIGHT(
            a,
            2
        )*{-1,
        1}
    ),
    ARRAYTOTEXT(IF(COUNT(
        FIND(
            "-",
            x
        )
    ),
    SEQUENCE(
        IF(
            b>0,
            b,
            SUM(
                a*{-1,
                1}
            )
        )+1,
        ,
        @a
    ),
    FILTER(a,
    (LEN(
        a
    )<5)*(--a>1900),
    ""))))))
Excel solution 5 for Extract Valid Year Ranges, proposed by Sunny Baggu:
=MAP(
 A2:A9,
    
 LAMBDA(t,
    
 LET(
 _a,
     TEXTSPLIT(
         
          UPPER(
              t
          ),
         
          ,
         
          VSTACK(
              "&",
               " ",
               ", ",
               CHAR(
                   SEQUENCE(
                       26,
                        ,
                        65
                   )
               )
          ),
         
          1
          
     ),
    
 _b,
     NOT(
         ISNUMBER(
             SEARCH(
                 "-",
                  _a
             )
         )
     ) * (LEN(
         _a
     ) > 4),
    
 _c,
     FILTER(
         _a,
          _b = 0,
          ""
     ),
    
 _ts,
     TEXTSPLIT(
         INDEX(
             _c,
              1,
              1
         ),
          ,
          "-"
     ) + 0,
    
 _ta,
     INDEX(
         _ts,
          1,
          1
     ),
    
 _tb,
     INDEX(
         _ts,
          2,
          1
     ),
    
 _tc,
     IF(
         LEN(
             _tb
         ) = 2,
          2000 + _tb,
          _tb
     ),
    
 _r,
     IFERROR(
         _tc - _ta + 1,
          0
     ),
    
 _d,
     IFERROR(
         IF(
             MID(
                 _c,
                  5,
                  1
             ) = "-",
              SEQUENCE(
                  _r,
                   ,
                   _ta
              ),
              --_c
         ),
          ""
     ),
    
 TEXTJOIN(
     ", ",
      ,
      FILTER(
          _d,
           _d > 1900,
           ""
      )
 )
 )
 )
)
Excel solution 6 for Extract Valid Year Ranges, proposed by Md. Zohurul Islam:
=MAP(A2:A9,
    LAMBDA(x,
    LET(
a,
    TRIM(
        REGEXREPLACE(
            x,
            "[aA-zZ]",
            
        )
    ),
    
b,
    TEXTAFTER(
        a,
        "-"
    ),
    
c,
    LEN(
        b
    ),
    
d,
    IFERROR(
        IFS(
            c<4,
            TEXTBEFORE(
                a,
                "-"
            )&"-"&LEFT(
                a,
                2
            )&b
        ),
        a
    ),
    
s,
    IFERROR(
        SEARCH(
            "-",
            a
        ),
        0
    ),
    
e,
    IF(
        s=0,
        0,
        SUM(
            TEXTSPLIT(
                d,
                "-"
            )*{-1,
            1}
        )+1
    ),
    
f,
    TEXTBEFORE(
        d,
        "-"
    ),
    
g,
    IFERROR(
        ARRAYTOTEXT(
            SEQUENCE(
                ,
                e,
                0
            )+f
        ),
        d
    ),
    
h,
    --TEXTSPLIT(
        g,
        {" & ",
        ", "}
    ),
    
j,
    ARRAYTOTEXT(FILTER(h,
    (h>1899)*(LEN(
        h
    )=4),
    "")),
    
j)))
Excel solution 7 for Extract Valid Year Ranges, proposed by Hamidi Hamid:
=LET(x,
    DROP(
        TEXTSPLIT(
            CONCAT(
                "/"&A2:A9
            ),
            {", ",
            "-",
            " "},
            "/"
        ),
        1
    ),
    u,
    MAP(
        x,
        LAMBDA(
            a,
            BYROW(
                IFERROR(
                    MID(
                        a,
                        SEQUENCE(
                            ,
                            30
                        ),
                        1
                    )*1,
                    ""
                ),
                CONCAT
            )
        )
    ),
    g,
    IFERROR(IF((LEN(
        u
    )>4)+(LEFT(
        u,
        2
    )*1<19),
    "",
    u)*1,
    0),
    v,
    IFERROR(
        SEARCH(
            "-",
            A2:A9
        )*1,
        0
    ),
    k,
    IF(
        g=0,
        0,
        TEXT(
            "01/01/"&g,
            "yyyy"
        )*1
    ),
    r,
    IF(
        k=0,
        "",
        k
    ),
    z,
    IF(
        v=0,
        BYROW(
            r,
            LAMBDA(
                a,
                TEXTJOIN(
                    ", ",
                    ,
                    a
                )
            )
        ),
        ""
    ),
    pk,
    BYROW(
        BYROW(
            k,
            LAMBDA(
                a,
                TEXTJOIN(
                    ", ",
                    ,
                    SEQUENCE(
                        ,
                        MAX(
                            a
                        )-LARGE(
                            a,
                            2
                        )+1,
                        LARGE(
                            a,
                            2
                        ),
                        1
                    )
                )
            )
        ),
        CONCAT
    ),
    q,
    IF(
        z<>"",
        z,
        pk
    ),
    IF(
        LEN(
            q
        )=1,
        "",
        q
    ))
Excel solution 8 for Extract Valid Year Ranges, proposed by Asheesh Pahwa:
=LET(alp,VSTACK(CHAR(SEQUENCE(26,,97)),CHAR(SEQUENCE(26,,65))),m,MAP(A2:A9,LAMBDA(v,CONCAT(TEXTSPLIT(v,alp," ",1)))),
_m,MAP(m,LAMBDA(a,LET(t,--TOROW(TEXTSPLIT(a,{",","&"})),
IFERROR(ARRAYTOTEXT(FILTER(t,(LEN(t)<5)*(t>1900),"")),a)))),
MAP(_m,LAMBDA(a,LET(f,ISNUMBER(FIND("-",a)),
m,MID(a,1,4),_m,MID(a,6,4),I,IF(LEN(_m)<4,20&_m,_m),
IF(f,ARRAYTOTEXT(SEQUENCE(I-m+1,,m)),a)))))
Excel solution 9 for Extract Valid Year Ranges, proposed by Guillermo Arroyo:
=MAP(
    A2:A9,
    LAMBDA(
        m,
        IFNA(
            TEXTJOIN(
                ", ",
                ,
                LET(
                    r,
                    REGEXEXTRACT(
                        m,
                        "(?
Excel solution 10 for Extract Valid Year Ranges, proposed by Fredson Alves Pinho:
=MAP(
    A2:A9,
    LAMBDA(
        x,
        LET(
            a,
            REGEXEXTRACT(
                x,
                "(?
Excel solution 11 for Extract Valid Year Ranges, proposed by Craig Runciman:
=LET(
    a,
    REGEXREPLACE(
        TRIM(
            REGEXREPLACE(
                A2:A9,
                "[^0-9-, ]|18d{2}|d{5,},*",
                ""
            )
        ),
        "(?<=d) ",
        ", "
    ),
    BYROW(
        a,
        LAMBDA(
            b,
            IF(
                ISERROR(
                    FIND(
                        "-",
                        @b
                    )
                ),
                @b,
                @TEXTJOIN(
                    ", ",
                    ,
                    SEQUENCE(
                        MOD(
                            --TEXTAFTER(
                                @b,
             &                   "-"
                            )+1000,
                            2000
                        )-MOD(
                            --TEXTBEFORE(
                                @b,
                                "-"
                            )+1000,
                            2000
                        )+1,
                        ,
                        --TEXTBEFORE(
                            @b,
                            "-"
                        )
                    )
                )
            )
        )
    )
)

Solving the challenge of Extract Valid Year Ranges with Python

Python solution 1 for Extract Valid Year Ranges, proposed by Konrad Gryczan, PhD:
import pandas as pd
import re
import numpy as np
path = "702 Extract Year.xlsx"
input = pd.read_excel(path, usecols="A", nrows=9, names=["Data"])
test = pd.read_excel(path, usecols="C", nrows=9, names=["Answer Expected"])
def extract_years(data):
 years = re.findall(r"d{2,}", data)
 years = [int(year) for year in years]
 years = [year + 2000 if year < 100 else year for year in years]
 years = [year if 1900 < year < 10000 else None for year in years]
 return [year for year in years if year is not None]
def has_range(data):
 return bool(re.search(r"d{2,}-d{2,}", data))
processed = []
for _, row in input.iterrows():
 data = row["Data"]
 years = extract_years(data)
 if years:
 if has_range(data):
 years = list(range(min(years), max(years) + 1))
 processed.append({"Data": data, "years": ", ".join(map(str, years))})
 else:
 processed.append({"Data": data, "years": None})
result = pd.DataFrame(processed)
res = input.merge(result, on="Data", how="left")
print(res['years'].equals(test['Answer Expected'])) 
                    
                  
Python solution 2 for Extract Valid Year Ranges, proposed by Alejandro Campos:
", ".join(map(str, sorted({y for start, end in re.findall(r'(?
Python solution 3 for Extract Valid Year Ranges, proposed by Ernesto Vega Castillo:
import pandas as pd
import re
from datetime import datetime
current_year = datetime.now().year
ruta = r"C:excelchallenge702.xlsx"
df = pd.read_excel(ruta, header=None)
df_rango = df.loc[1:8, 0] # Ajusta índices según el tamaño de tus datos
# print(df_rango)
def extract_years(df_rango):
 def get_years(line):
 matches = re.findall(r'(d{4})(?:-(d{2,4}))?', line)
 years = set()
 for start, end in matches:
 end = start[:2] + end if end and len(end) == 2 else end or start
 years.update(range(int(start), int(end) + 1))
 return sorted(y for y in years if 1900 <= y <= current_year)
 return pd.DataFrame({
 "Answer Expected": [", ".join(map(str, get_years(line))) for line in df_rango]
 })
result = extract_years(df_rango)
print(result)
Saludos!
                    
                  

Solving the challenge of Extract Valid Year Ranges with Python in Excel

Python in Excel solution 1 for Extract Valid Year Ranges, proposed by Alejandro Campos:
import re
def extract_years_per_line(data):
 return pd.DataFrame({
 "Extracted Years": [
 ", ".join(map(str, sorted({y for start, end in re.findall(r'(d{4})(?:-(d{2,4}))?', line)
 for y in range(int(start), (int(start[:2] + end) if end and len(end) == 2 else int(end or start)) + 1)
 if 1900 <= y <= 9999})))
 if (years := {y for start, end in re.findall(r'(d{4})(?:-(d{2,4}))?', line)
 for y in range(int(start), (int(start[:2] + end) if end and len(end) == 2 else int(end or start)) + 1)
 if 1900 <= y <= 9999}) else ""
 for line in data]})
df_result = extract_years_per_line(xl("A2:A9")[0])
df_result
                    
                  

Solving the challenge of Extract Valid Year Ranges with R

R solution 1 for Extract Valid Year Ranges, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "Excel/702 Extract Year.xlsx"
input = read_excel(path, range = "A1:A9")
test = read_excel(path, range = "C1:C9")
result = input %>%
 mutate(years = str_extract_all(Data, "\d{2,}")) %>%
 unnest(years) %>%
 mutate(years = as.numeric(years)) %>%
 mutate(years = ifelse(years < 100, years + 2000, years)) %>%
 mutate(
 years = ifelse(years > 1900 & years < 10000, years, NA),
 has_range = ifelse(str_detect(Data, "\d{2,}-\d{2,}"), TRUE, FALSE)
 ) %>%
 na.omit() %>%
 group_by(Data) %>%
 summarise(
 years = if (any(has_range)) {
 seq(min(years, na.rm = TRUE), max(years, na.rm = TRUE)) %>%
 paste(collapse = ", ")
 } else {
 paste(years, collapse = ", ")
 }
 )
res = input %>%
 left_join(result, by = "Data")
all.equal(res$years, test$`Answer Expected`)
# TRUE
                    
                  

&&

Leave a Reply