(Excel formulas also welcome) Clean the data and generate the result table as shown. (If code can’t be contained in one comment, please divide code into 2 parts and post into 2 comments)
📌 Challenge Details and Links
ExcelBI Power Query Challenge Number: 8
Challenge Difficulty: ⭐️⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Clean Data and Restructure Table with Power Query
Power Query solution 1 for Clean Data and Restructure Table, proposed by Luan Rodrigues:
let
Fonte = Excel.CurrentWorkbook(){[Name="Tabela1"]}[Content],
Fomr0 = Table.TransformColumns(Fonte,{{"Data", Text.Proper, type text}}),
Divide_Col1 = Table.ExpandListColumn(Table.TransformColumns(Fomr0, {{"Data", Splitter.SplitTextByDelimiter(",", QuoteStyle.None), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
Divide_Col2 = Table.ExpandListColumn(Table.TransformColumns(Divide_Col1, {{"Data", Splitter.SplitTextByDelimiter("Year", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
Divide_Col3 = Table.ExpandListColumn(Table.TransformColumns(Divide_Col2, {{"Data", Splitter.SplitTextByDelimiter("Revenue", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
Divide_Col4 = Table.ExpandListColumn(Table.TransformColumns(Divide_Col3, {{"Data", Splitter.SplitTextByDelimiter("Cost", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
Form = Table.SelectRows(
Table.TransformColumns( Divide_Col4,{{"Data", Text.Trim, type text}}),each ([Data] <> "")),
Power Query solution 2 for Clean Data and Restructure Table, proposed by Luan Rodrigues:
Part 2
Comp = Table.AddColumn(Form, "Company", each if Text.Select([Data],{"a".."z","A".."Z"}) = "" then null else Text.Select([Data],{"a".."z","A".."Z"})),
Preenc_Abaixo = Table.FillDown(Comp,{"Company"}),
Num = Table.SelectRows(Table.AddColumn(Preenc_Abaixo, "Dados", each Text.Select([Data],{"0".."9"})), each ([Dados] <> "")),
Indice = Table.AddIndexColumn(Num, "Índice.1", 1, 1, Int64.Type),
Mod = Table.AddColumn(Indice, "Personalizar.2", each Number.Mod([Índice.1],3)),
Cond = Table.AddColumn(Mod, "Unir", each if [Personalizar.2] = 1 then [Índice.1] else null),
Preenc_Abaixo1 = Table.FillDown(Cond,{"Unir"})[[Company],[Dados],[Unir]],
Group = Table.Group(Preenc_Abaixo1, {"Unir"}, {{"Tabela", each _, type table [Personalizar=text, Dados=text, Unir=number]}}),
List = Table.AddColumn(Group, "Listar", each [Tabela][Dados]),
Ext = Table.TransformColumns(List, {"Listar", each Text.Combine(List.Transform(_, Text.From), ";"), type text}),
Tab_Exp = Table.Distinct(Table.ExpandTableColumn(Ext, "Tabela", {"Company"}, {"Company"})),
Tabela = Table.SplitColumn(Tab_Exp, "Listar", Splitter.SplitTextByDelimiter(";", QuoteStyle.Csv), {"Year", "Revenue", "Cost"})[[Company],[Year],[Revenue],[Cost]]
in
Tabela
Power Query solution 3 for Clean Data and Restructure Table, proposed by Victor Wang:
let
Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Uppercased Text" = Table.TransformColumns(Source,{{"Data", Text.Upper, type text}}),
#"Added Custom" = Table.AddColumn(#"Uppercased Text", "Company", each Text.BeforeDelimiter( [Data], "YEAR" ) ),
#"Added Custom1" = Table.AddColumn(#"Added Custom", "OtherColumns", each Text.Replace( Text.Select( Text.AfterDelimiter( [Data], [Company] ), {"A".."Z", "0".."9"} ), "YEAR", "^YEAR" ) ),
Power Query solution 4 for Clean Data and Restructure Table, proposed by Venkata Rajesh:
let
Source = Data,
#"Added Custom" = Table.ExpandListColumn(
Table.AddColumn(
Table.AddColumn(
Table.AddColumn(
Source,
"Custom",
each Text.Select(Text.Proper([Data]), {"A" .. "Z", "a" .. "z", "0" .. "9"})
),
"Company",
each Text.BeforeDelimiter([Custom], "Year"),
type text
),
"Custom.1",
each Splitter.SplitTextByPositions(Text.PositionOf([Custom], "Year", Occurrence.All))(
[Custom]
)
),
"Custom.1"
),
#"Added Custom2" = Table.AddColumn(
#"Added Custom",
" Custom.2",
each Splitter.SplitTextByPositions(
List.Sort(
{
Text.PositionOf([Custom.1], "Year"),
Text.PositionOf([Custom.1], "Revenue"),
Text.PositionOf([Custom.1], "Cost")
}
)
)([Custom.1])
),
#"Added Custom3" = Table.SelectColumns(
Table.TransformColumnTypes(
Table.ExpandRecordColumn(
Table.AddColumn(
#"Added Custom2",
"Custom.2",
each Record.FromList(
List.Transform([#" Custom.2"], each Text.Select(_, {"0" .. "9"})),
List.Transform([#" Custom.2"], each Text.Select(_, {"A" .. "z"}))
)
),
"Custom.2",
{"Year", "Revenue", "Cost"},
{"Year", "Revenue", "Cost"}
),
{{"Year", Int64.Type}, {"Revenue", Int64.Type}, {"Cost", Int64.Type}}
),
{"Company", "Year", "Revenue", "Cost"}
)
in
#"Added Custom3"Power Query solution 5 for Clean Data and Restructure Table, proposed by Artur Pilipczuk:
let
Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Uppercased Text" = Table.TransformColumns(Source,{{"Data", Text.Upper, type text}}),
#"Split Column by Delimiter" = Table.SplitColumn(#"Uppercased Text", "Data", Splitter.SplitTextByEachDelimiter({" "}, QuoteStyle.None, false), {"Company", "RawData"}),
#"Replaced Value" = Table.ReplaceValue(#"Split Column by Delimiter",",",", ",Replacer.ReplaceText,{"RawData"}),
Custom1 = Table.TransformColumns( #"Replaced Value",{{"RawData",each
List.Accumulate({"YEAR","REVENUE","COST"},
_,
(x,y)=> Text.Replace(x,y,"^" & y & "|")
)
}}),
Custom2 = Table.TransformColumns( Custom1,{{"RawData",each
List.Accumulate({":"," ",",","-"},
_,
(x,y)=> Text.Replace(x,y,"")
)
}}),
Power Query solution 6 for Clean Data and Restructure Table, proposed by Alexandru Badiu:
let
Source = DataSource,
NoSign = Table.AddColumn(Source, "Custom", each Text.Combine(
List.Transform(
Text.ToList([Data]),
each
if not List.Contains({"a" .. "z", "A" .. "Z", " ", "0" .. "9"}, _) then
Text.Replace(_, _, " ")
else
_
)
)),
CapitalizedEachWord = Table.TransformColumns(NoSign,{{"Custom", Text.Proper, type text}}),
Company = Table.AddColumn(CapitalizedEachWord, "Company", each Text.Select(Text.BeforeDelimiter(Text.Proper([Data]), " "), {"A".."z","0".."9"}), type text),
RenameToIBM = Table.ReplaceValue(Company,"Ibm","IBM",Replacer.ReplaceText,{"Company"}),
WithoutCompanyName = Table.TransformColumns(RenameToIBM, {{"Custom", each Text.AfterDelimiter(_, " "), type text}}),
Flags = Table.TransformColumns( WithoutCompanyName,{{"Custom",each
List.Accumulate({"Year","Revenue","Cost"},
_,
(state, current)=> Text.Replace(state,current, "¤" & current & "|")
)
}}),
SplitCol = Table.ExpandListColumn(Table.TransformColumns(Flags, {{"Custom", Splitter.SplitTextByDelimiter("¤", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Custom"),
Solving the challenge of Clean Data and Restructure Table with Excel
Excel solution 1 for Clean Data and Restructure Table, proposed by محمد حلمي:
=LET(
n,PROPER(B2:B5),
i,TRIM(SUBSTITUTE(SUBSTITUTE(
REDUCE(n,{":",",","-"},LAMBDA(a,d,
SUBSTITUTE(a,d," "))),"r2","r 2"),"e2","e 2")),DROP(
REDUCE("",i,LAMBDA(z,x,VSTACK(z,LET(
s,TEXTBEFORE(x,{":"," ","-"}),
u,DROP(SORT(WRAPCOLS(
TEXTSPLIT(TEXTAFTER(x," ")," "),2),,-1,1),1),
e,HSTACK(s,WRAPCOLS(u,COUNTA(u)/3)),
IFNA(e,s))))),1))Excel solution 2 for Clean Data and Restructure Table, proposed by محمد حلمي:
=VSTACK({"Company","Year","Revenue","Cost"};
SUBSTITUTE(SUBSTITUTE(SUBSTITUTE(
HSTACK(
PROPER(TEXTSPLIT(TRIM(CONCAT(BYROW(B2:B5;LAMBDA(a;TAKE(REPT(" "&TEXTBEFORE(a;" ");SEQUENCE(COUNT(UNIQUE(SEARCH("year";a;SEQUENCE(LEN(a)))))));-1)))));;" "));
TEXTSPLIT(TRIM(CONCAT(" "&BYROW($B$2:$B$5;LAMBDA(rr;CONCAT(" "&TEXTBEFORE(TEXTAFTER(PROPER(rr);"Year";SEQUENCE((LEN(rr)-LEN(SUBSTITUTE(PROPER(rr);"Year";"")))/4);1);","))))));;" ");
DROP(TEXTSPLIT(TRIM(CONCAT(" "&BYROW(B2:B5;LAMBDA(ii;CONCAT(TEXTBEFORE(TEXTAFTER(PROPER(ii);"Revenue";SEQUENCE((LEN(ii)-LEN(SUBSTITUTE(PROPER(ii);"Year";"")))/4);1;1);"Cost";;;1))))));;" ");1);
TEXTSPLIT(TRIM(CONCAT(BYROW(B2:B5;LAMBDA(ff;CONCAT(" "&TEXTBEFORE(SUBSTITUTE(TEXTAFTER(PROPER(ff);"Cost";SEQUENCE((LEN(ff)-LEN(SUBSTITUTE(PROPER(ff);"Year";"")))/4);1);": ";"");" ";;;1))))));;" ")
);":";"");"-";"");",";""))Excel solution 3 for Clean Data and Restructure Table, proposed by Aditya Kumar Darak 🇮🇳:
= VSTACK(
{"Company","Year","Revenue","Cost"},
DROP(
REDUCE(
"",
SEQUENCE(ROWS(data)),
LAMBDA(
a,
b,
VSTACK(
a,
LET(
_n1,
PROPER(TEXTBEFORE(INDEX(data, b), {" ",":","-",","})),
IFNA(
HSTACK(
_n1,
--MAP(
TEXTSPLIT(
TEXTAFTER(INDEX(data, b), "year", , 1),
{"revenue","cost"},
"year",
,
1),
LAMBDA(
a,
CONCAT(
IF(ISNUMBER(--MID(a, SEQUENCE(LEN(a)), 1)),
MID(a, SEQUENCE(LEN(a)), 1),
""))))),
_n1))))),
1))Excel solution 4 for Clean Data and Restructure Table, proposed by Hussein SATOUR:
=LET(Data, B3:B6, Del, ":,-",
F, LAMBDA(ME,S,C, IF(C = "", S, ME(ME, SUBSTITUTE(S, LEFT(C), " "), MID(C, 2, LEN(C) - 1) ))),
a, PROPER(TRIM(F(F, Data, Del))), b, (LEN(a)-LEN(SUBSTITUTE(a, "Year", "")))/4,
c, LAMBDA(x, SEQUENCE(INDEX(b,x,1))&INDEX(a,x)), d, LAMBDA(ME,y, IF(y=1, c(1), VSTACK(ME(ME, y-1), c(y)))),
e, d(d, COUNTA(Data)), g, LEFT(e), h, MID(TEXTBEFORE(e, " "),2,100),
i, SUBSTITUTE(SUBSTITUTE(TEXTBEFORE(TEXTAFTER(e, "Year", g), "Year", 1,,1), "Revenue", "/"), "Cost", "/"),
TRIM(HSTACK(h, TEXTBEFORE(i, "/"), TEXTAFTER(TEXTBEFORE(i, "/", 2), "/"), TEXTAFTER(i, "/",-1))))Excel solution 5 for Clean Data and Restructure Table, proposed by Sergei Baklan:
=LET( h, {"Company","Year","Revenue","Cost"},
ABC, LAMBDA(chr,
IFERROR(
BITOR(chr, 48),
IFERROR(CHAR(BITXOR(CODE(chr), 32)) = chr, 0)
) ),
noSymbols, LAMBDA(me,str,
IF( str="", "",
IF( ABC( LEFT(str)), LEFT(str), "" ) &
me( me, RIGHT(str, LEN(str) -1) ) )),
keepABC, LAMBDA(str, noSymbols( noSymbols, str ) ),
getName, LAMBDA(str,
LET(
s, TEXTSPLIT(str, h,,,1 ),
n, ( COLUMNS(s) -1 )/3,
value, PROPER( @TAKE( s,,1) ),
EXPAND( value, n, , value)
)),
getValue, LAMBDA(str,field,
TOCOL( CHOOSECOLS( TEXTSPLIT( str, field, h,,1 ), 2), 2 )
),
VSTACK( h,
DROP(
REDUCE( "", data,
LAMBDA(a,b, VSTACK( a, LET( v, keepABC(b),
HSTACK(
getName( v),
getValue( v, INDEX( h, 2) ),
getValue( v, INDEX( h, 3) ),
getValue( v, INDEX( h, 4) )
) ) ) ) ),
1 )
) )Excel solution 6 for Clean Data and Restructure Table, proposed by Sergei Baklan:
=LET( h, {"Year","Revenue","Cost"},
ABC, LAMBDA( chr,
IFERROR(
BITOR(chr, 48),
IFERROR(CHAR(BITXOR(CODE(chr), 32)) = chr, 0)
) ),
noSymbols, LAMBDA(me,str,
IF( str="", "",
IF( ABC( LEFT(str)), LEFT(str), "" ) &
me( me, RIGHT(str, LEN(str) -1) ) )),
keepABC, LAMBDA(str, noSymbols( noSymbols, str ) ),
getName, LAMBDA(str,
LET(
s, TEXTSPLIT(str, h,,,1 ),
n, ( COLUMNS(s) -1 )/3,
value, @TAKE( s,,1),
IF( SEQUENCE(n), PROPER( keepABC(value )) )
)),
getValue, LAMBDA(str,field,
LET(
s, TEXTSPLIT(str, field,,,1 ),
n, COLUMNS(s) - 1,
values, DROP( s,,1),
TOCOL( BYCOL( values,
LAMBDA(v, keepABC( @TEXTBEFORE(v,h,,1,,v) ) )) )
)),
VSTACK( HSTACK( "Company", h),
DROP(
REDUCE( "", data,
LAMBDA(a,v, VSTACK( a,
HSTACK(
getName( v),
getValue( v, INDEX( h, 1) ),
getValue( v, INDEX( h, 2) ),
getValue( v, INDEX( h, 3) )
) ) ) ),
1 )
) )Excel solution 7 for Clean Data and Restructure Table, proposed by red craven:
=LET(x,REGEXREPLACE(REGEXREPLACE(PROPER(B3:B6),"W",""),"([a-z0-9])([A-Z])","$1 $2"),
DROP(REDUCE("",x,LAMBDA(m,n,VSTACK(m,
LET(y,TEXTSPLIT(n," "),
z,--REGEXREPLACE(SORT(WRAPROWS(DROP(y,,1),3),,-1,1),"[A-Za-z]",""),
HSTACK(IF(TAKE(z,,1),TAKE(y,,1)),z))))),1))