Extract all the emails from the text provided in the question table. Be careful with the use of the “@” symbol, as it may be used for purposes other than email addresses, such as referring to departments like @EmployeeRecognition. Note: The text is generated by ChatGPT.
📌 Challenge Details and Links
Challenge Number: 71
Challenge Difficulty: ⭐⭐⭐
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Extract From Text! Part 1 with Power Query
Power Query solution 1 for Extract From Text! Part 1, proposed by Omid Motamedisedeh:
let
S = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
RegExp=(A)=>"<script>document.write("&A&".match(RegExp(""[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"", 'g')) +'
');</script>",
Re = Text.Split(Web.Page(RegExp(""""&Text.Combine(S[Text])&"""")){0}[Data]{0}[Children]{1}[Children]{0}[Text],",")
in
Re
Power Query solution 2 for Extract From Text! Part 1, proposed by Zoran Milokanović:
let
Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content],
S = List.Select(
List.TransformMany(Source[Text], each Text.Split(_, " "), (i, _) => Text.TrimEnd(_, ".")),
each Text.PositionOf(_, "@") > 0
)
in
S
Power Query solution 3 for Extract From Text! Part 1, proposed by Brian Julius:
let
Source = Excel.Workbook(File.Contents("C:UsersbrjulDownloadsCH-071 Extract from Text.xlsx"), true, true),
Nav = Table.SelectRows( Table.SelectColumns( Table.PromoteHeaders( Source{[Item="Sheet1",Kind="Sheet"]}[Data]), "Text"), each [Text] <> null ),
RScript = R.Execute("library(stringr)#(lf)email_regex <- ""[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}""#(lf)df <- dataset#(lf)#(lf)df$emails <- sapply(df$Text, function(x) {#(lf) str_extract_all(x, email_regex)[[1]]#(lf)})#(lf)#(lf)df$emails <- sapply(df$emails, function(x) {#(lf) paste(x, collapse = "", "")#(lf)})#(lf)#(lf)print(df)",[dataset=Nav]),
Clean = Table.SelectRows( Table.SelectColumns( RScript{[Name="df"]}[Value], "emails"), each Text.Length( [emails] ) > 0 )
in
Clean
Power Query solution 4 for Extract From Text! Part 1, proposed by Rafael González B.:
let
Source = Excel.CurrentWorkbook(){0}[Content],
Add = List.Combine(Table.AddColumn(Source, "Email", each
let
a = Text.Split([Text], " "),
b = List.Select(a, (x) => Text.Contains(x, "@") and not Text.StartsWith(x, "@"))
in
b
)[Email]),
Result = Table.ReplaceValue(Table.FromColumns({Add},
{"Email Address"}),".com.",".com",
Replacer.ReplaceText,{"Email Address"})
in
Result
🧙♂️🧙♂️🧙♂️
Power Query solution 5 for Extract From Text! Part 1, proposed by Rafael González B.:
let
Source = Excel.CurrentWorkbook(){0}[Content],
Add = List.Combine(
Table.AddColumn(
Source,
"Email",
each
let
a = Text.Split([Text], " "),
b = List.Select(a, (x) => Text.Contains(x, "@") and not Text.StartsWith(x, "@")),
c = List.Transform(b, each Text.TrimEnd(_, {"."}))
in
c
)[Email]
),
Result = Table.FromColumns({Add}, {"Email Address"})
in
Result
Power Query solution 6 for Extract From Text! Part 1, proposed by Ramiro Ayala Chávez:
let
S = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
L = List.Transform,
a = L(S[Text], each Text.Split(_," ")),
b = List.Combine(L(a, each List.Select(_, each Text.Contains(_,"@") and not Text.StartsWith(_,"@")))),
c = L(b, each if Text.EndsWith(_,".") then Text.RemoveRange(_,Text.Length(_)-1) else _),
Sol = Table.FromColumns({c},{"Email Address"})
in
Sol
Power Query solution 7 for Extract From Text! Part 1, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Sol = List.RemoveNulls(
Table.AddColumn(
Source,
"A",
each
let
a = List.Transform(
Text.Split([Text], " "),
each Text.TrimEnd(Text.TrimStart(_, "@"), ".")
),
b = List.Select(a, each Text.Contains(_, "@")){0}?
in
b
)[A]
)
in
Sol
Power Query solution 8 for Extract From Text! Part 1, proposed by Alexis Olson:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Words = Text.Split(Text.Combine(Source[Text], " "), " "),
Emails = List.Select(Words, each Text.Contains(_, "@") and not Text.StartsWith(_, "@")),
Result = List.Transform(Emails, each Text.Trim(_, "."))
in
Result
Power Query solution 9 for Extract From Text! Part 1, proposed by Yaroslav Drohomyretskyi:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Result = Table.SelectRows(
Table.ExpandListColumn(
Table.TransformColumns(
Source,
{
{
"Text",
Splitter.SplitTextByAnyDelimiter({" ", ". ", ", "}, QuoteStyle.Csv),
let
itemType = (type nullable text) meta [Serialized.Text = true]
in
type {itemType}
}
}
),
"Text"
),
each Text.Contains([Text], "@") and not Text.StartsWith([Text], "@")
)
in
Result
Power Query solution 10 for Extract From Text! Part 1, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
#"Added Custom" = Table.AddColumn(Source, "Email", each Text.Split([Text], " ")),
#"Expanded S" = Table.ExpandListColumn(#"Added Custom", "Email"),
#"Added Custom1" = Table.AddColumn(
#"Expanded S",
"T",
each Text.Contains([Email], "@") and Text.Start([Email], 1) <> "@"
),
#"Filtered Rows" = Table.SelectRows(#"Added Custom1", each ([T] = true)),
#"Removed Other Columns" = Table.SelectColumns(#"Filtered Rows", {"Email"})
in
#"Removed Other Columns"
Power Query solution 11 for Extract From Text! Part 1, proposed by Arnaud Duvernois:
let
Source = Excel.CurrentWorkbook(){[Name = "Tableau2"]}[Content],
Select = List.Select(
List.TransformMany(Source[Colonne1], (s) => Text.Split(s, " "), (s, y) => y),
each Text.Contains(_, "@") and Text.Contains(_, ".")
)
in
Select
Solving the challenge of Extract From Text! Part 1 with Excel
Excel solution 1 for Extract From Text! Part 1, proposed by Bo Rydobon 🇹🇭:
=TOCOL(
REGEXREPLACE(
B3:B19,
".*?b(w+@w+.w+)b.*",
"$1"
),
3
)
Excel solution 2 for Extract From Text! Part 1, proposed by 🇰🇷 Taeyong Shin:
=REGEXEXTRACT(
ARRAYTOTEXT(
B3:B19
),
"w+@w+.[A-Za-z]+",
1
)
No limit to the number of characters
=LET(
t,
B3:B19,
p,
"w+@w+.[A-Za-z]+",
REGEXEXTRACT(
FILTER(
t,
REGEXTEST(
t,
p
)
),
p
)
)
=TOCOL(
REGEXEXTRACT(
B3:B19,
"w+@w+.[A-Za-z]+"
),
2
)
Excel solution 3 for Extract From Text! Part 1, proposed by محمد حلمي:
=TOCOL(
TEXTAFTER(
TEXTBEFORE(
B3:B20,
".com"
),
" ",
-1
),
2
)
Excel solution 4 for Extract From Text! Part 1, proposed by Oscar Mendez Roca Farell:
=LET(
t,
SUBSTITUTE(
SUBSTITUTE(
B3:B19,
"com.",
"com"
),
" ",
REPT(
" ",
50
)
),
TOCOL(
TRIM(
MID(
t,
FIND(
"@c",
t
)-15,
50
)
),
2
)
)
Excel solution 5 for Extract From Text! Part 1, proposed by Julian Poeltl:
=LET(
T,
TEXTSPLIT(
TEXTJOIN(
" ",
,
B3:B19
),
,
" "
),
FILTER(
T,
ISNUMBER(
SEARCH(
"@*.",
T
)
)
)
)
Excel solution 6 for Extract From Text! Part 1, proposed by Julian Poeltl:
=LET(
T,
TEXTJOIN(
" ",
,
B3:B19
),
SP,
TEXTSPLIT(
T,
,
" "
),
FILTER(
SP,
ISNUMBER(
SEARCH(
"@*.",
SP
)
)
)
)
Excel solution 7 for Extract From Text! Part 1, proposed by Kris Jaganah:
=REGEXEXTRACT(
CONCAT(
B4:B19&" "
),
"[w.-]+@[w.-]+.w+",
1
)
Excel solution 8 for Extract From Text! Part 1, proposed by Yaroslav Drohomyretskyi:
=TOCOL(REGEXEXTRACT(TEXTJOIN(" ",,B3:B19),"[w.-]+@[w.-]+.w+",1),3)
Excel solution 9 for Extract From Text! Part 1, proposed by Sunny Baggu:
=LET( a,
"@company.com", TOCOL( TEXTAFTER(
TEXTBEFORE(
B3:B19,
a
),
" ",
-1
) &
a, 3 ))
Excel solution 10 for Extract From Text! Part 1, proposed by Sunny Baggu:
=LET( a,
TEXTSPLIT(
TEXTJOIN(
" ",
,
B3:B19
),
,
" "
), FILTER(
a,
ISNUMBER(
SEARCH(
".com",
a
)
)
))
Excel solution 11 for Extract From Text! Part 1, proposed by Bilal Mahmoud kh.:
=LET(
a,
MAP(
TEXTSPLIT(
TEXTJOIN(
" ",
,
B3:B19
),
,
" "
),
LAMBDA(
x,
IF(
LEN(
TEXTBEFORE(
x,
"@",
,
,
,
""
)
)>0,
IF(
MID(
x,
LEN(
x
),
1
)=".",
LEFT(
x,
LEN(
x
)-1
),
x
),
""
)
)
),
b,
FILTER(
a,
a <>""
),
b
)
Excel solution 12 for Extract From Text! Part 1, proposed by Hussein SATOUR:
=REGEXEXTRACT(TEXTJOIN(" ",,B3:B19),"[A-Za-z.]+@[A-Za-z]+.com",1)
Excel solution 13 for Extract From Text! Part 1, proposed by Pieter de B.:
=TOCOL(
MAP(
B3:B19,
LAMBDA(
x,
LET(
y,
TEXTSPLIT(
x,
" "
),
TOCOL(
IFS(
1-ISERR(
FIND(
".",
y
)+FIND(
"@",
y
)
),
y
),
2
)
)
)
),
2
)
Or if multiple could be found per line:
=TOCOL(
REDUCE(
"Email Address",
B3:B19,
LAMBDA(
a,
b,
LET(
x,
TEXTSPLIT(
b,
" "
),
VSTACK(
a,
IFS(
1-ISERR(
FIND(
".",
x
)+FIND(
"@",
x
)
),
x
)
)
)
)
),
2
)
Excel solution 14 for Extract From Text! Part 1, proposed by Rick Rothstein:
=LET(m,
MAP(B3:B19,
LAMBDA(b,
LET(t,
TEXTSPLIT(
b,
" "
),
CONCAT(IFNA(IF((LEN(
TEXTBEFORE(
t,
"@"
)
)>0)*(LEN(
TEXTAFTER(
t,
"@"
)
)>0),
t,
""),
""))))),
FILTER(
m,
LEN(
m
)
))
Excel solution 15 for Extract From Text! Part 1, proposed by Thang Van:
=LET(a,MAP(B3:B19,LAMBDA(_each,IFERROR(REGEXEXTRACT(_each,"S+@S+"),""))),FILTER(a,a<>""))
Solving the challenge of Extract From Text! Part 1 with Python
Python solution 1 for Extract From Text! Part 1, proposed by Konrad Gryczan, PhD:
import pandas as pd
path = "CH-071 Extract from Text.xlsx"
input = pd.read_excel(path, usecols = "B", skiprows = 1)
test = pd.read_excel(path, usecols = "D", skiprows = 1, nrows = 9)
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
input["Email Address"] = input["Text"].str.findall(email_regex)
output = input[input["Email Address"].str.len() > 0]
output["Email Address"] = output["Email Address"].str[0]
output = output.drop(columns="Text").reset_index(drop=True)
print(output.equals(test)) # True
Python solution 2 for Extract From Text! Part 1, proposed by Abdallah Ally:
import pandas as pd
import re
# Read the Excel file
file_path = 'CH-071 Extract from Text.xlsx'
df = pd.read_excel(file_path, usecols='B', skiprows=1)
# Perform data wrangling
pattern = r'b([.w]+@[.w]+.[a-zA-Z]+)b'
# Capture all emails, in case of multiple, join by ", "
emails = [', '.join(y) for x in df['Text'] if (y:=re.findall(pattern, x))]
# Split by ', " where multiple emails were captured
emails = [x for y in emails for x in y.split(', ')]
# Ensure unique emails while retaining the original capture order
emails = [x for i, x in enumerate(emails) if emails[ : i + 1].count(x) == 1]
df = pd.DataFrame(data=emails, columns=['Email Address'])
# Display the final dataset
df
Solving the challenge of Extract From Text! Part 1 with R
R solution 1 for Extract From Text! Part 1, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "files/CH-071 Extract from Text.xlsx"
input = read_excel(path, range = "B2:B19")
test = read_excel(path, range = "D2:D11")
email_regex <- "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
result = input %>%
mutate(`Email Address` = str_extract(Text, email_regex)) %>%
na.omit() %>%
select(`Email Address`)
all.equal(result, test, check.attributes = FALSE)
# [1] TRUE
