Home » Data Extraction

Data Extraction

Extract the Date and Amount from the details Dynamic array function allowed, but Extra marks for Legacy solutions or PowerQuery Solutions.

📌 Challenge Details and Links
Challenge Number: 45
Challenge Difficulty: ⭐
📥Link to the solutions on LinkedIn

Solving the challenge of Data Extraction with Power Query

Power Query solution 1 for Data Extraction, proposed by Kris Jaganah:
let
  A = Excel.CurrentWorkbook(){[Name = "Table54"]}[Content], 
  B = Table.AddColumn(
    A, 
    "Date", 
    each Date.From(List.Last(Text.Split(Text.BeforeDelimiter([Revenues Details], ","), " ")))
  ), 
  C = Table.AddColumn(
    B, 
    "Amount", 
    each List.RemoveNulls(
      List.Transform(Text.Split([Revenues Details], " "), each try Number.From(_) otherwise null)
    ){0}?
  )
in
  C
Power Query solution 2 for Data Extraction, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table5"]}[Content], 
  Sol = Table.Combine(
    Table.AddColumn(
      Source, 
      "A", 
      each 
        let
          a = Text.SplitAny([Revenues Details], " ,."), 
          b = List.Transform(
            {Date.From, Number.From}, 
            (x) => List.RemoveNulls(List.Transform(a, each try x(_) otherwise null))
          ), 
          c = Table.FromColumns(b, {"Date", "Amount"})
        in
          c
    )[A]
  )
in
  Sol
Power Query solution 3 for Data Extraction, proposed by Abdallah Ally:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Transform = Table.TransformColumns(
    Source, 
    {"Revenues Details", each Text.Remove(Text.Lower(_), {"a" .. "z", " "})}
  ), 
  Split = Table.SplitColumn(
    Transform, 
    "Revenues Details", 
    each Text.Split(_, ","), 
    {"Date", "Amount"}
  ), 
  Result = Table.TransformColumnTypes(Split, {{"Date", type date}, {"Amount", Int64.Type}})
in
  Result
Power Query solution 4 for Data Extraction, proposed by Ramiro Ayala Chávez:
let
  S = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Fx = (x) =>
    let
      a = Text.SplitAny(x, " ,"), 
      b = List.Transform(a, each try Date.From(_) otherwise null), 
      c = List.Transform(a, each try Number.From(_) otherwise null), 
      d = List.RemoveNulls(b & c), 
      e = if List.Count(d) = 2 then d else d & {null}, 
      f = Table.FromRows({e}, {"Date", "Amount"})
    in
      f, 
  g = Table.AddColumn(S, "A", each Fx([Revenues Details]))[[A]], 
  Sol = Table.ExpandTableColumn(g, "A", {"Date", "Amount"})
in
  Sol
Power Query solution 5 for Data Extraction, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  #"Added Custom" = Table.AddColumn(
    Source, 
    "T1", 
    each Text.Select([Revenues Details], {"0" .. "9", "/", ","})
  ), 
  #"Removed Other Columns" = Table.SelectColumns(#"Added Custom", {"T1"}), 
  #"Split Column by Delimiter" = Table.SplitColumn(
    #"Removed Other Columns", 
    "T1", 
    Splitter.SplitTextByDelimiter(",", QuoteStyle.Csv), 
    {"Date", "Amount"}
  )
in
  #"Split Column by Delimiter"
Power Query solution 6 for Data Extraction, proposed by Ahmed Ariem:
let
  f = (x) =>
    Table.FromRows({Text.Split(Text.Select(x, {"0" .. "9", "/", ","}), ",")}, {"Date", "Amount"}), 
  Source = Excel.CurrentWorkbook(){[Name = "Table5"]}[Content], 
  trans = Table.Combine(Table.TransformColumns(Source, {"Revenues Details", f})[Revenues Details])
in
  trans
Power Query solution 7 for Data Extraction, proposed by Md. Shah Alam, Microsoft Certified Trainer:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table5"]}[Content], 
  #"Lowercased Text" = Table.TransformColumns(Source, {{"Revenues Details", Text.Lower, type text}}), 
  Date = Table.AddColumn(
    #"Lowercased Text", 
    "Date", 
    each Text.BetweenDelimiters([Revenues Details], "on ", ","), 
    type date
  ), 
  #"Changed Type" = Table.TransformColumnTypes(Date, {{"Date", type date}}), 
  Amount = Table.AddColumn(
    #"Changed Type", 
    "Amount", 
    each Text.Select(Text.AfterDelimiter([Revenues Details], ", "), {"0" .. "9"})
  )[[Date], [Amount]]
in
  Amount

Solving the challenge of Data Extraction with Excel

Excel solution 1 for Data Extraction, proposed by Bo Rydobon 🇹🇭:
=IFNA(
   --REGEXEXTRACT(
       B3:B6,
       {"d+/[d/]+",
       "(^|)d+[ $]"}),
   "")
Excel solution 2 for Data Extraction, proposed by Rick Rothstein:
=IFNA(
   DROP(
       REDUCE(
           "",
           B3:B6,
           LAMBDA(
               a,
               x,
               VSTACK(
                   a,
                   TEXTSPLIT(
                       x,
                       TEXTSPLIT(
                           x,
                           {0,
                           1,
                           2,
                           3,
                           4,
                           5,
                           6,
                           7,
                           8,
                           9,
                           "/"},
                           ,
                           1),
                       ,
                       1)))),
       1),
   "")
Excel solution 3 for Data Extraction, proposed by 🇰🇷 Taeyong Shin:
=NUMBERVALUE(
   REGEXREPLACE(
       B3:B6,
       "([d/]{8,12})|(d+)|.",
       "$"&{1,
       2}))
Excel solution 4 for Data Extraction, proposed by Kris Jaganah:
=IFNA(
   REDUCE(
       {"Date",
       "Amount"},
       Table5[Revenues Details],
       LAMBDA(
           x,
           y,
           VSTACK(
               x,
               REGEXEXTRACT(
                   y,
                   "b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])/(d{4})b|b[0-9]+",
                   1)))),
   "")
Excel solution 5 for Data Extraction, proposed by Julian Poeltl:
=REDUCE(
   HSTACK(
       "Date",
       "Amount"),
   B3:B6,
   LAMBDA(
       A,
       B,
       IFNA(
           VSTACK(
               A,
               LET(
                   S,
                   --TEXTSPLIT(
                       B,
                       {" ",
                       ","}),
                   FILTER(
                       S,
                       ISNUMBER(
                           S)))),
           "")))
Excel solution 6 for Data Extraction, proposed by Hussein SATOUR:
=DROP(
   REDUCE(
       "",
       B3:B6,
       LAMBDA(
           x,
           y,
           IFNA(
               VSTACK(
                   x,
                   LET(
                       a,
                       --TEXTSPLIT(
                           y,
                           {" ",
                           ","}),
                       FILTER(
                           a,
                           NOT(
                               ISERR(
                                   a))))),
               ""))),
   1)
Excel solution 7 for Data Extraction, proposed by Oscar Mendez Roca Farell:
=IFERROR(AGGREGATE(15,
    6,
    VALUE(MID(SUBSTITUTE(
        SUBSTITUTE(
            $B3,
             ",",
             ""),
         " ",
         REPT(
             " ",
              50)),
    1+50*(ROW(
        $1:$10)-1),
    50)),
    COLUMNS(
        $G3:G$3)),
    "")

M65:
=TOROW(
   VALUE(
       TEXTSPLIT(
           B3,
            {",",
            " "})),
    2)

Dynamic Array:
=LET(
   m,
    -MID(
        MAP(
            B3:B6,
             LAMBDA(
                 a,
                  ARRAYTOTEXT(
                      TOCOL(
                          --TEXTSPLIT(
                              a,
                               {",",
                               " "}),
                           2)))),
         {1,
         8},
         {5,
         6}),
    IFERROR(
        -m,
         ""))
Excel solution 8 for Data Extraction, proposed by Sunny Baggu:
=LET(
   
    t,
    Table5[Revenues Details],
   
    _d,
    TEXTAFTER(
        TEXTBEFORE(
            t,
             ", "),
         " ",
         -1),
   
    _v,
    MAP(
        
         t,
        
         LAMBDA(
             a,
             
              LET(
                  
                   _a,
                   TEXTSPLIT(
                       a,
                        ,
                        " "),
                  
                   FILTER(
                       _a,
                        ISNUMBER(
                            --_a),
                        "")
                   )
              )
         ),
   
    HSTACK(
        _d,
         _v)
   )
Excel solution 9 for Data Extraction, proposed by Sunny Baggu:
=IFNA(
   
    REDUCE(
        
         {"Date",
         "Amount"},
        
         Table5[Revenues Details],
        
         LAMBDA(
             a,
              v,
             
              VSTACK(
                  
                   a,
                  
                   TEXTSPLIT(
                       
                        UPPER(
                            v),
                       
                        VSTACK(
                            " ",
                             ", ",
                             ".",
                             CHAR(
                                 SEQUENCE(
                                     26,
                                      ,
                                      65))),
                       
                        ,
                       
                        1
                        )
                   )
              )
         ),
   
    ""
   )
Excel solution 10 for Data Extraction, proposed by Hamidi Hamid:
=LET(
   x,
   BYROW(
       DROP(
           IFERROR(
               REDUCE(
                   0,
                   B3:B6,
                   LAMBDA(
                       a,
                       b,
                       VSTACK(
                           a,
                           TEXTSPLIT(
                               b,
                               " ",
                               ,
                               1))))*1,
               ""),
           1),
       LAMBDA(
           a,
           IFERROR(
               LOOKUP(
                   9^9,
                   a),
               ""))),
   w,
   DROP(
       REDUCE(
           0,
           B3:B6,
           LAMBDA(
               a,
               b,
               VSTACK(
                   a,
                   TEXTAFTER(
                       TEXTBEFORE(
                           b,
                           ", ",
                           1),
                       " ",
                       -1)))),
       1),
   HSTACK(
       TEXT(
           w,
           "mm/dd/yyyy"),
       x))

or format(mm/jj/aaaa")

=LET(x,BYROW(DROP(IFERROR(REDUCE(0,B3:B6,LAMBDA(a,b,VSTACK(a,TEXTSPLIT(b," ",,1))))*1,""),1),LAMBDA(a,IFERROR(LOOKUP(9^9,a),""))),w,DROP(REDUCE(0,B3:B6,LAMBDA(a,b,VSTACK(a,TEXTAFTER(TEXTBEFORE(b,",
    ",1)," ",-1)))),1),HSTACK(TEXT(w,"mm/jj/aaaa"),x))
Excel solution 11 for Data Extraction, proposed by Asheesh Pahwa:
=DROP(
   REDUCE(
       "",
       B3:B6,
       LAMBDA(
           x,
           y,
           IFNA(
               VSTACK(
                   x,
                   LET(
                       t,
                       TEXTSPLIT(
                           y,
                           {" ",
                           ","}),
                       FILTER(
                           t,
                           ISNUMBER(
                               --t)))),
               ""))),
   1)
Excel solution 12 for Data Extraction, proposed by Asheesh Pahwa:
=LET(
   r,
   B3:B6,
   DROP(
       REDUCE(
           "",
           r,
           LAMBDA(
               x,
               y,
               
               IFNA(
                   VSTACK(
                       x,
                       LET(
                           t,
                           TOROW(
                               TRIM(
                                   TEXTSPLIT(
                                       LOWER(
                                           y),
                                       CHAR(
                                           SEQUENCE(
                                               26,
                                               ,
                                               97)),
                                       ",",
                                       1)),
                               3),
                           FILTER(
                               t,
                               NOT(
                                   ISERR(
                                       --t))))),
                   ""))),
       1))
Excel solution 13 for Data Extraction, proposed by Meganathan Elumalai:
=TRANSPOSE(
   FILTERXML(
       ""&SUBSTITUTE(
           SUBSTITUTE(
               B3,
               ",",
               ),
           " ",
           "")&"",
       "//B[translate(.,'0123456789','')!=.]"))
Excel solution 14 for Data Extraction, proposed by JvdV –:
=--REGEXEXTRACT(
   B3:B6&0,
   {"[d/]+",
   ",.*?Kd+"})

Or,
    just for fun with FILTERXML():

=FILTERXML(
   ""&SUBSTITUTE(
       SUBSTITUTE(
           TRIM(
               B3:B6)&" 0",
           ",",
           ),
       " ",
       "")&"",
   "//s[translate(.,'0123456789','')='"&{"//']",
   "']"})
Excel solution 15 for Data Extraction, proposed by Eddy Wijaya:
=REDUCE(
   D2:E2,
   B3:B6,
   LAMBDA(
       a,
       v,
       VSTACK(
           a,
           LET(
               
               dat,
               TEXTAFTER(
                   TEXTBEFORE(
                       v,
                       ","),
                   "on ",
                   ,
                   1),
               
               am_a,
               TEXTAFTER(
                   v,
                   ", "),
               
               am_b,
               IFERROR(
                   TEXTJOIN(
                       "",
                       ,
                       TOCOL(
                           --MID(
                               am_a,
                               SEQUENCE(
                                   LEN(
                                       am_a)),
                               1),
                           2)),
                   ""),
               
               HSTACK(
                   dat,
                   am_b)))))
Excel solution 16 for Data Extraction, proposed by Mey Tithveasna:
=DROP(
   REDUCE(
       0,
       B3:B6,
       LAMBDA(
           _a,
           _b,
           IFNA(
               VSTACK(
                   _a,
                   TOROW(
                       --TEXTSPLIT(
                           _b,
                           {",",
                           " "}),
                       2)),
               ""))),
   1)
Excel solution 17 for Data Extraction, proposed by Hazem Hassan:
=LET(
   
    a,
    REGEXREPLACE(
        
         B3:B6,
        
         "[a-zA-Z- .]",
        
         ""
         ),
   
    HSTACK(
        
         TEXTBEFORE(
             a,
              ","),
        
         TEXTAFTER(
             a,
              ",")
         )
   )

#2

=IFNA(
   DROP(
       REDUCE(
           "",
           B3:B6,
           LAMBDA(
               x,
               y,
               VSTACK(
                   x,
                   TEXTSPLIT(
                       y,
                       HSTACK(
                           {",",
                           ".",
                           " "},
                           CHAR(
                               SEQUENCE(
                                   ,
                                   58,
                                   65))),
                       ,
                       1)))),
       1),
   0)

#3
=IFNA(
   DROP(
       REDUCE(
           "",
           B3:B6,
           LAMBDA(
               x,
               y,
               VSTACK(
                   x,
                   TOROW(
                       --TEXTSPLIT(
                           y,
                           {" ";","}),
                       3)))),
       1),
   0)
Excel solution 18 for Data Extraction, proposed by Md. Shah Alam, Microsoft Certified Trainer:
=TEXT(
   --MID(
       Table5[@[Revenues Details]],
       SEARCH(
           "on",
           Table5[@[Revenues Details]])+3,
       8),
   "mm-dd-yy")

Amount: =TEXTJOIN(
   "",
   TRUE,
   IFERROR(
       --MID(
           TEXTAFTER(
               Table5[@[Revenues Details]],
               ", "),
           SEQUENCE(
               LEN(
                   Table5[@[Revenues Details]])),
           1),
       ""))
Excel solution 19 for Data Extraction, proposed by Bhaskar Joshi:
=DROP(
   REDUCE(
       "",
       B3:B6,
       LAMBDA(
           initial,
           current,
           IFERROR(
               VSTACK(
                   initial,
                   LET(
                       _data,
                       TEXTSPLIT(
                           current,
                           ,
                           {" ",
                           ","},
                           TRUE),
                       _mid,
                       ISNUMBER(
                           --_data),
                       TRANSPOSE(
                           FILTER(
                               _data,
                               _mid)))),
               ""))),
   1)
Excel solution 20 for Data Extraction, proposed by Bhaskar Joshi:
=LET(
   _data,
   TEXTSPLIT(
       B3,
       ,
       {" ",
       ","},
       TRUE),
   _mid,
   ISNUMBER(
       --_data),
   TRANSPOSE(
       FILTER(
           _data,
           _mid)))
Excel solution 21 for Data Extraction, proposed by Petya Koleva:
=TEXTAFTER(
   TEXTBEFORE(
       B3:B6,
       ","),
   "n ")
=TAKE(
   SORT(
       IFERROR(
           TRANSPOSE(
               TEXTSPLIT(
                   B3,
                   " "))*1,
           "")),
   1)
Excel solution 22 for Data Extraction, proposed by abdelaziz allam:
=--LET(
   all,
   MAP(
       Table5[Revenues Details],
       LAMBDA(
           a,
           TEXTJOIN(
               ",",
               TRUE,
               IFERROR(
                   --TEXTSPLIT(
                       a,
                       {" ",
                       ","}),
                   "")))),
   HSTACK(
       TEXTSPLIT(
           all,
           ","),
       IFNA(
           TEXTAFTER(
               all,
               ","),
           0)))
Excel solution 23 for Data Extraction, proposed by Songglod Petchamras:
=REDUCE(
   {"Date",
   "Amount"},
   Table5[Revenues Details],
   LAMBDA(
       a,
       v,
       LET(
           t,
           --TEXTSPLIT(
               v,
               {", ",
               " "}),
           IFNA(
               VSTACK(
                   a,
                   FILTER(
                       t,
                       ISNUMBER(
                           t))),
               ""))))

Solving the challenge of Data Extraction with Python

Python solution 1 for Data Extraction, proposed by Konrad Gryczan, PhD:
import pandas as pd
import re
path = "files/Excel Challenge September 29th.xlsx"
input = pd.read_excel(path, usecols="B", skiprows=1, nrows=5)
test = pd.read_excel(path, usecols="D:E", skiprows=1, nrows=5)
date_patt = r'd{1,2}/d{1,2}/d{4}'
amount_patt = r'd+(?=s|$)'
input['Date'] = input['Revenues Details'].apply(lambda x: pd.to_datetime(re.search(date_patt, x).group(), format='%m/%d/%Y') if re.search(date_patt, x) else None)
input['Amount'] = input['Revenues Details'].apply(lambda x: re.search(amount_patt, x).group().strip() if re.search(amount_patt, x) else None)
input['Amount'] = input['Amount'].astype('float64')
result = input.iloc[:, 1:]
print(result.equals(test)) # True

Solving the challenge of Data Extraction with R

R solution 1 for Data Extraction, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
library(rebus.datetimes)
path = "files/Excel Challenge September 29th.xlsx"
input = read_excel(path, range = "B2:B6")
test = read_excel(path, range = "D2:E6")
date_patt = digit(1, 2) %R% "/" %R% digit(1, 2) %R% "/" %R% digit(4,4)
amount_patt = SPACE %R% one_or_more(DIGIT) %R% or(SPACE, END)
result = input %>%
 mutate(Date = str_extract(`Revenues Details`, date_patt) %>% mdy() %>% as.POSIXct(),
 Amount = str_extract(`Revenues Details`, amount_patt) %>% as.numeric()) %>%
 select(-`Revenues Details`)
all.equal(result, test, check.attributes = FALSE)
#> [1] TRUE

Leave a Reply