Home » Clean Data and Restructure Table

Clean Data and Restructure Table

(Excel formulas also welcome) Clean the data and generate the result table as shown. (If code can’t be contained in one comment, please divide code into 2 parts and post into 2 comments)

📌 Challenge Details and Links
ExcelBI Power Query Challenge Number: 8
Challenge Difficulty: ⭐️⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Clean Data and Restructure Table with Power Query

Power Query solution 1 for Clean Data and Restructure Table, proposed by Luan Rodrigues:
let
 Fonte = Excel.CurrentWorkbook(){[Name="Tabela1"]}[Content],
 Fomr0 = Table.TransformColumns(Fonte,{{"Data", Text.Proper, type text}}),
 Divide_Col1 = Table.ExpandListColumn(Table.TransformColumns(Fomr0, {{"Data", Splitter.SplitTextByDelimiter(",", QuoteStyle.None), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
 Divide_Col2 = Table.ExpandListColumn(Table.TransformColumns(Divide_Col1, {{"Data", Splitter.SplitTextByDelimiter("Year", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
 Divide_Col3 = Table.ExpandListColumn(Table.TransformColumns(Divide_Col2, {{"Data", Splitter.SplitTextByDelimiter("Revenue", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
 Divide_Col4 = Table.ExpandListColumn(Table.TransformColumns(Divide_Col3, {{"Data", Splitter.SplitTextByDelimiter("Cost", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Data"),
 Form = Table.SelectRows(
Table.TransformColumns( Divide_Col4,{{"Data", Text.Trim, type text}}),each ([Data] <> "")),


                    
                  
          
Power Query solution 2 for Clean Data and Restructure Table, proposed by Luan Rodrigues:
Part 2 
Comp = Table.AddColumn(Form, "Company", each if Text.Select([Data],{"a".."z","A".."Z"}) = "" then null else Text.Select([Data],{"a".."z","A".."Z"})),
 Preenc_Abaixo = Table.FillDown(Comp,{"Company"}),
 Num = Table.SelectRows(Table.AddColumn(Preenc_Abaixo, "Dados", each Text.Select([Data],{"0".."9"})), each ([Dados] <> "")),
 Indice = Table.AddIndexColumn(Num, "Índice.1", 1, 1, Int64.Type),
 Mod = Table.AddColumn(Indice, "Personalizar.2", each Number.Mod([Índice.1],3)),
 Cond = Table.AddColumn(Mod, "Unir", each if [Personalizar.2] = 1 then [Índice.1] else null),
 Preenc_Abaixo1 = Table.FillDown(Cond,{"Unir"})[[Company],[Dados],[Unir]],
 Group = Table.Group(Preenc_Abaixo1, {"Unir"}, {{"Tabela", each _, type table [Personalizar=text, Dados=text, Unir=number]}}),
 List = Table.AddColumn(Group, "Listar", each [Tabela][Dados]),
 Ext = Table.TransformColumns(List, {"Listar", each Text.Combine(List.Transform(_, Text.From), ";"), type text}),
 Tab_Exp = Table.Distinct(Table.ExpandTableColumn(Ext, "Tabela", {"Company"}, {"Company"})),
 Tabela = Table.SplitColumn(Tab_Exp, "Listar", Splitter.SplitTextByDelimiter(";", QuoteStyle.Csv), {"Year", "Revenue", "Cost"})[[Company],[Year],[Revenue],[Cost]]
in
 Tabela
                    
                  
Power Query solution 3 for Clean Data and Restructure Table, proposed by Victor Wang:
let
 Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
 #"Uppercased Text" = Table.TransformColumns(Source,{{"Data", Text.Upper, type text}}),
 #"Added Custom" = Table.AddColumn(#"Uppercased Text", "Company", each Text.BeforeDelimiter( [Data], "YEAR" ) ),
 #"Added Custom1" = Table.AddColumn(#"Added Custom", "OtherColumns", each Text.Replace( Text.Select( Text.AfterDelimiter( [Data], [Company] ), {"A".."Z", "0".."9"} ), "YEAR", "^YEAR" ) ),


                    
                  
          
Power Query solution 4 for Clean Data and Restructure Table, proposed by Venkata Rajesh:
let
  Source = Data, 
  #"Added Custom" = Table.ExpandListColumn(
    Table.AddColumn(
      Table.AddColumn(
        Table.AddColumn(
          Source, 
          "Custom", 
          each Text.Select(Text.Proper([Data]), {"A" .. "Z", "a" .. "z", "0" .. "9"})
        ), 
        "Company", 
        each Text.BeforeDelimiter([Custom], "Year"), 
        type text
      ), 
      "Custom.1", 
      each Splitter.SplitTextByPositions(Text.PositionOf([Custom], "Year", Occurrence.All))(
        [Custom]
      )
    ), 
    "Custom.1"
  ), 
  #"Added Custom2" = Table.AddColumn(
    #"Added Custom", 
    " Custom.2", 
    each Splitter.SplitTextByPositions(
      List.Sort(
        {
          Text.PositionOf([Custom.1], "Year"), 
          Text.PositionOf([Custom.1], "Revenue"), 
          Text.PositionOf([Custom.1], "Cost")
        }
      )
    )([Custom.1])
  ), 
  #"Added Custom3" = Table.SelectColumns(
    Table.TransformColumnTypes(
      Table.ExpandRecordColumn(
        Table.AddColumn(
          #"Added Custom2", 
          "Custom.2", 
          each Record.FromList(
            List.Transform([#" Custom.2"], each Text.Select(_, {"0" .. "9"})), 
            List.Transform([#" Custom.2"], each Text.Select(_, {"A" .. "z"}))
          )
        ), 
        "Custom.2", 
        {"Year", "Revenue", "Cost"}, 
        {"Year", "Revenue", "Cost"}
      ), 
      {{"Year", Int64.Type}, {"Revenue", Int64.Type}, {"Cost", Int64.Type}}
    ), 
    {"Company", "Year", "Revenue", "Cost"}
  )
in
  #"Added Custom3"
Power Query solution 5 for Clean Data and Restructure Table, proposed by Artur Pilipczuk:
let
 Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
 #"Uppercased Text" = Table.TransformColumns(Source,{{"Data", Text.Upper, type text}}),
 #"Split Column by Delimiter" = Table.SplitColumn(#"Uppercased Text", "Data", Splitter.SplitTextByEachDelimiter({" "}, QuoteStyle.None, false), {"Company", "RawData"}),
 #"Replaced Value" = Table.ReplaceValue(#"Split Column by Delimiter",",",", ",Replacer.ReplaceText,{"RawData"}),
 Custom1 = Table.TransformColumns( #"Replaced Value",{{"RawData",each 
List.Accumulate({"YEAR","REVENUE","COST"},
_, 
(x,y)=> Text.Replace(x,y,"^" & y & "|")
)
}}),
 Custom2 = Table.TransformColumns( Custom1,{{"RawData",each 
List.Accumulate({":"," ",",","-"},
_, 
(x,y)=> Text.Replace(x,y,"")
)
}}),


                    
                  
          
Power Query solution 6 for Clean Data and Restructure Table, proposed by Alexandru Badiu:
let
 Source = DataSource,
 NoSign = Table.AddColumn(Source, "Custom", each Text.Combine(
 List.Transform(
 Text.ToList([Data]), 
 each 
 if not List.Contains({"a" .. "z", "A" .. "Z", " ", "0" .. "9"}, _) then
 Text.Replace(_, _, " ")
 else
 _
 )
 )),
 CapitalizedEachWord = Table.TransformColumns(NoSign,{{"Custom", Text.Proper, type text}}),
 Company = Table.AddColumn(CapitalizedEachWord, "Company", each Text.Select(Text.BeforeDelimiter(Text.Proper([Data]), " "), {"A".."z","0".."9"}), type text),
 RenameToIBM = Table.ReplaceValue(Company,"Ibm","IBM",Replacer.ReplaceText,{"Company"}),
 WithoutCompanyName = Table.TransformColumns(RenameToIBM, {{"Custom", each Text.AfterDelimiter(_, " "), type text}}),
 Flags = Table.TransformColumns( WithoutCompanyName,{{"Custom",each 
List.Accumulate({"Year","Revenue","Cost"},
_, 
(state, current)=> Text.Replace(state,current, "¤" & current & "|")
)
}}),
 SplitCol = Table.ExpandListColumn(Table.TransformColumns(Flags, {{"Custom", Splitter.SplitTextByDelimiter("¤", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Custom"),


                    
                  
          

Solving the challenge of Clean Data and Restructure Table with Excel

Excel solution 1 for Clean Data and Restructure Table, proposed by محمد حلمي:
=LET(
n,PROPER(B2:B5),
i,TRIM(SUBSTITUTE(SUBSTITUTE(
REDUCE(n,{":",",","-"},LAMBDA(a,d,
SUBSTITUTE(a,d," "))),"r2","r 2"),"e2","e 2")),DROP(
REDUCE("",i,LAMBDA(z,x,VSTACK(z,LET(
s,TEXTBEFORE(x,{":"," ","-"}),
u,DROP(SORT(WRAPCOLS(
TEXTSPLIT(TEXTAFTER(x," ")," "),2),,-1,1),1),
e,HSTACK(s,WRAPCOLS(u,COUNTA(u)/3)),
IFNA(e,s))))),1))
Excel solution 2 for Clean Data and Restructure Table, proposed by محمد حلمي:
=VSTACK({"Company","Year","Revenue","Cost"};
SUBSTITUTE(SUBSTITUTE(SUBSTITUTE(
HSTACK(

PROPER(TEXTSPLIT(TRIM(CONCAT(BYROW(B2:B5;LAMBDA(a;TAKE(REPT(" "&TEXTBEFORE(a;" ");SEQUENCE(COUNT(UNIQUE(SEARCH("year";a;SEQUENCE(LEN(a)))))));-1)))));;" "));

TEXTSPLIT(TRIM(CONCAT(" "&BYROW($B$2:$B$5;LAMBDA(rr;CONCAT(" "&TEXTBEFORE(TEXTAFTER(PROPER(rr);"Year";SEQUENCE((LEN(rr)-LEN(SUBSTITUTE(PROPER(rr);"Year";"")))/4);1);","))))));;" ");

DROP(TEXTSPLIT(TRIM(CONCAT(" "&BYROW(B2:B5;LAMBDA(ii;CONCAT(TEXTBEFORE(TEXTAFTER(PROPER(ii);"Revenue";SEQUENCE((LEN(ii)-LEN(SUBSTITUTE(PROPER(ii);"Year";"")))/4);1;1);"Cost";;;1))))));;" ");1);

TEXTSPLIT(TRIM(CONCAT(BYROW(B2:B5;LAMBDA(ff;CONCAT(" "&TEXTBEFORE(SUBSTITUTE(TEXTAFTER(PROPER(ff);"Cost";SEQUENCE((LEN(ff)-LEN(SUBSTITUTE(PROPER(ff);"Year";"")))/4);1);": ";"");" ";;;1))))));;" ")
);":";"");"-";"");",";""))
Excel solution 3 for Clean Data and Restructure Table, proposed by Aditya Kumar Darak 🇮🇳:
= VSTACK(
 {"Company","Year","Revenue","Cost"},
 DROP(
 REDUCE(
 "",
 SEQUENCE(ROWS(data)),
 LAMBDA(
 a,
 b,
 VSTACK(
 a,
 LET(
 _n1,
 PROPER(TEXTBEFORE(INDEX(data, b), {" ",":","-",","})),
 IFNA(
 HSTACK(
 _n1,
 --MAP(
 TEXTSPLIT(
 TEXTAFTER(INDEX(data, b), "year", , 1),
 {"revenue","cost"},
 "year",
 ,
 1),
 LAMBDA(
 a,
 CONCAT(
 IF(ISNUMBER(--MID(a, SEQUENCE(LEN(a)), 1)),
 MID(a, SEQUENCE(LEN(a)), 1),
 ""))))),
 _n1))))),
 1))
Excel solution 4 for Clean Data and Restructure Table, proposed by Hussein SATOUR:
=LET(Data, B3:B6, Del, ":,-",
F, LAMBDA(ME,S,C, IF(C = "", S, ME(ME, SUBSTITUTE(S, LEFT(C), " "), MID(C, 2, LEN(C) - 1) ))),
a, PROPER(TRIM(F(F, Data, Del))), b, (LEN(a)-LEN(SUBSTITUTE(a, "Year", "")))/4,
c, LAMBDA(x, SEQUENCE(INDEX(b,x,1))&INDEX(a,x)), d, LAMBDA(ME,y, IF(y=1, c(1), VSTACK(ME(ME, y-1), c(y)))),
e, d(d, COUNTA(Data)), g, LEFT(e), h, MID(TEXTBEFORE(e, " "),2,100),
i, SUBSTITUTE(SUBSTITUTE(TEXTBEFORE(TEXTAFTER(e, "Year", g), "Year", 1,,1), "Revenue", "/"), "Cost", "/"),
TRIM(HSTACK(h, TEXTBEFORE(i, "/"), TEXTAFTER(TEXTBEFORE(i, "/", 2), "/"), TEXTAFTER(i, "/",-1))))
Excel solution 5 for Clean Data and Restructure Table, proposed by Sergei Baklan:
=LET( h, {"Company","Year","Revenue","Cost"},

 ABC, LAMBDA(chr,
 IFERROR(
 BITOR(chr, 48),
 IFERROR(CHAR(BITXOR(CODE(chr), 32)) = chr, 0)
 ) ),

 noSymbols, LAMBDA(me,str,
 IF( str="", "",
 IF( ABC( LEFT(str)), LEFT(str), "" ) &
 me( meRIGHT(str, LEN(str) -1) ) )),

 keepABC, LAMBDA(str, noSymbols( noSymbols, str ) ),

 getName, LAMBDA(str,
 LET(
 s, TEXTSPLIT(str, h,,,1 ),
 n, ( COLUMNS(s) -1 )/3,
 value, PROPER( @TAKE( s,,1) ),
 EXPAND( value, n, , value)
 )),

 getValue, LAMBDA(str,field,
 TOCOL( CHOOSECOLS( TEXTSPLIT( str, field, h,,1 ), 2), 2 )
),

 VSTACK( h,
 DROP(
 REDUCE( "", data,
 LAMBDA(a,b, VSTACK( a, LET( v, keepABC(b),
 HSTACK(
 getName( v),
 getValue( v, INDEX( h, 2) ),
 getValue( v, INDEX( h, 3) ),
 getValue( v, INDEX( h, 4) )
 ) ) ) ) ),
 1 )
) )
Excel solution 6 for Clean Data and Restructure Table, proposed by Sergei Baklan:
=LET( h, {"Year","Revenue","Cost"},

 ABC, LAMBDA( chr,
 IFERROR(
 BITOR(chr, 48),
 IFERROR(CHAR(BITXOR(CODE(chr), 32)) = chr, 0)
 ) ),

 noSymbols, LAMBDA(me,str,
 IF( str="", "",
 IF( ABC( LEFT(str)), LEFT(str), "" ) &
 me( meRIGHT(str, LEN(str) -1) ) )),

 keepABC, LAMBDA(str, noSymbols( noSymbols, str ) ),

 getName, LAMBDA(str,
 LET(
 s, TEXTSPLIT(str, h,,,1 ),
 n, ( COLUMNS(s) -1 )/3,
 value, @TAKE( s,,1),
 IF( SEQUENCE(n), PROPER( keepABC(value )) )
 )),

 getValue, LAMBDA(str,field,
 LET(
 s, TEXTSPLIT(str, field,,,1 ),
 n, COLUMNS(s) - 1,
 values, DROP( s,,1),
 TOCOL( BYCOL( values,
 LAMBDA(v, keepABC( @TEXTBEFORE(v,h,,1,,v) ) )) )
 )),

 VSTACK( HSTACK( "Company", h),
 DROP(
 REDUCE( "", data,
 LAMBDA(a,v, VSTACK( a,
 HSTACK(
 getName( v),
 getValue( v, INDEX( h, 1) ),
 getValue( v, INDEX( h, 2) ),
 getValue( v, INDEX( h, 3) )
 ) ) ) ),
 1 )
) )
Excel solution 7 for Clean Data and Restructure Table, proposed by red craven:
=LET(x,REGEXREPLACE(REGEXREPLACE(PROPER(B3:B6),"W",""),"([a-z0-9])([A-Z])","$1 $2"),
DROP(REDUCE("",x,LAMBDA(m,n,VSTACK(m,
LET(y,TEXTSPLIT(n," "),
z,--REGEXREPLACE(SORT(WRAPROWS(DROP(y,,1),3),,-1,1),"[A-Za-z]",""),
HSTACK(IF(TAKE(z,,1),TAKE(y,,1)),z))))),1))

Leave a Reply