Home » Clean Salary and Promotion Data

Clean Salary and Promotion Data

Clean the given data and align them properly in departments and sort on Dept ID & Names. Christian-65600: 65600 is salary If it is accompanied with the date, then that is promotion date. Ex. Carolyn-51900-11/12/2023

📌 Challenge Details and Links
ExcelBI Power Query Challenge Number: 226
Challenge Difficulty: ⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Clean Salary and Promotion Data with Power Query

Power Query solution 1 for Clean Salary and Promotion Data, proposed by Zoran Milokanović:
let
  Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content], 
  H = {"Dept ID", "Emp Names", "Promotion Date", "Salary"}, 
  S = Table.Sort(
    Table.FromList(
      Table.ToRows(
        Table.UnpivotOtherColumns(
          Table.FillDown(Source, {H{0}}), 
          {H{0}, "Highest Paid Employee"}, 
          "A", 
          "V"
        )
      ), 
      each 
        let
          t = Text.Split(_{3}, "-")
        in
          {_{0}, t{0}, Date.From(t{2}?, "en-US"), Number.From(t{1})}, 
      H
    ), 
    {H{0}, H{1}}
  )
in
  S
Power Query solution 2 for Clean Salary and Promotion Data, proposed by Kris Jaganah:
let
  A = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  B = Table.FillDown(A, {"Dept ID"}), 
  C = (y) =>
    [
      a = Table.SelectRows(B, (x) => x[Dept ID] = y), 
      b = Table.UnpivotOtherColumns(a, {"Dept ID", "Highest Paid Employee"}, "A", "V"), 
      c = Table.SplitColumn(
        b, 
        "V", 
        each Text.Split(_, "-"), 
        {"Emp Names", "Salary", "Promotion Date"}
      ), 
      d = Table.TransformColumns(
        c, 
        {{"Promotion Date", each Date.FromText(_, [Format = "M/d/yyyy"])}, {"Salary", Number.From}}
      ), 
      e = Table.Sort(d, {"Emp Names", 0})
    ][e], 
  D = Table.Combine(
    List.Transform(
      List.Distinct(B[Dept ID]), 
      each C(_)[[Dept ID], [Emp Names], [Promotion Date], [Salary]]
    )
  )
in
  D
Power Query solution 3 for Clean Salary and Promotion Data, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Group = Table.Group(
    Source, 
    "Dept ID", 
    {
      {
        "A", 
        each 
          let
            a = _, 
            b = List.Skip(Table.ToColumns(a), 2), 
            c = List.RemoveNulls(List.Combine(b)), 
            d = List.Transform(
              c, 
              each 
                let
                  d1 = Text.Split(_, "-"), 
                  d2 = 
                    if List.Count(d1) = 3 then
                      {d1{0}} & {Date.From(List.Last(d1))} & {d1{1}}
                    else
                      {d1{0}} & {null} & {d1{1}}
                in
                  d2
            ), 
            e = Table.Sort(
              Table.FromRows(d, {"Emp Names", "Promotion Date", "Salary"}), 
              "Emp Names"
            )
          in
            e
      }
    }, 
    0, 
    (a, b) => Number.From(b <> null)
  ), 
  Sol = Table.ExpandTableColumn(Group, "A", Table.ColumnNames(Group[A]{0}))
in
  Sol
Power Query solution 4 for Clean Salary and Promotion Data, proposed by Luan Rodrigues:
let
  Fonte = Tabela1, 
  grp = Table.Group(
    Fonte, 
    "Dept ID", 
    {
      {
        "tab", 
        each 
          let
            a = List.RemoveNulls(List.Skip(List.Combine(Table.ToColumns(_)))), 
            b = Table.Combine(List.Transform(a, (x) => Table.FromRows({Text.Split(x, "-")}))), 
            c = Table.RenameColumns(
              b, 
              List.Zip({Table.ColumnNames(b), {"Emp Names", "Salary", "Promotion Date"}})
            )[[Emp Names], [Promotion Date], [Salary]]
          in
            c
      }
    }, 
    0, 
    (a, b) => Number.From(b <> null)
  ), 
  exp = Table.ExpandTableColumn(grp, "tab", Table.ColumnNames(grp[tab]{0})), 
  srt = Table.Sort(exp, {"Dept ID", "Emp Names"})
in
  srt
Power Query solution 5 for Clean Salary and Promotion Data, proposed by Hussein SATOUR:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  FillDown = Table.FillDown(Source, {"Dept ID"}), 
  UnpivotOther = Table.UnpivotOtherColumns(FillDown, {"Dept ID"}, "Attribute", "Value"), 
  RemovCols = Table.RemoveColumns(UnpivotOther, {"Attribute"}), 
  SplitValue = Table.SplitColumn(
    RemovCols, 
    "Value", 
    Splitter.SplitTextByDelimiter("-"), 
    {"Emp Name", "Salary", "Promotion"}
  ), 
  SortRows = Table.Buffer(
    Table.Sort(SplitValue, {{"Dept ID", 0}, {"Emp  
  Name", 0}, {"Promotion", 1}})
  ), 
  RemoveDuplicates = Table.Distinct(SortRows, {"Emp Name", "Salary"}), 
  ChangeTypes = Table.TransformColumnTypes(
    RemoveDuplicates, 
    {{"Promotion", type date}, {"Salary", Int64.Type}}
  )
in
  ChangeTypes
Power Query solution 6 for Clean Salary and Promotion Data, proposed by Abdallah Ally:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  FillDown = Table.FillDown(Source, {"Dept ID"}), 
  Transform = List.Transform(
    Table.ToRows(FillDown), 
    each [#"Dept ID" = _{0}, Data = List.Select(List.Skip(_, 2), each _ <> null)]
  ), 
  Expand = Table.ExpandListColumn(Table.FromRecords(Transform), "Data"), 
  Split = Table.SplitColumn(
    Expand, 
    "Data", 
    each Text.Split(_, "-"), 
    {"Emp Names", "Salary", "Promotion Date"}
  ), 
  Sort = Table.Sort(Split, {{"Dept ID", 0}, {"Emp Names", 0}}), 
  Select = Table.SelectColumns(Sort, {"Dept ID", "Emp Names", "Promotion Date", "Salary"}), 
  Result = Table.TransformColumnTypes(Select, {"Promotion Date", type date})
in
  Result
Power Query solution 7 for Clean Salary and Promotion Data, proposed by Eric Laforce:
let
  CN = {"Emp Names", "Promotion Date", "Salary"}, 
  Source = Excel.CurrentWorkbook(){[Name = "tData226"]}[Content], 
  FillDown = Table.FillDown(Source, {"Dept ID"}), 
  Group = Table.Group(
    FillDown, 
    {"Dept ID"}, 
    {
      "G", 
      each 
        let
          _Values = List.RemoveNulls([Promoted Employees] & [Not Promoted Employees]), 
          _T = Table.FromColumns({_Values}, {"ESP"}), 
          _Split = Table.SplitColumn(
            _T, 
            "ESP", 
            Splitter.SplitTextByDelimiter("-"), 
            {CN{0}, CN{2}, CN{1}}
          ), 
          _Sort = Table.Sort(_Split, CN{0})
        in
          _Sort
    }
  ), 
  Expand = Table.ExpandTableColumn(Group, "G", CN), 
  ChgType = Table.TransformColumnTypes(Expand, {{CN{1}, type date}, {CN{2}, Int64.Type}})
in
  ChgType
Power Query solution 8 for Clean Salary and Promotion Data, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
  S = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  A = Table.FillDown(S, {"Dept ID"}), 
  B = Table.AddColumn(A, "C", each List.RemoveNulls(List.Skip(Record.ToList(_), 2))), 
  C = Table.SelectColumns(B, {"Dept ID", "C"}), 
  D = Table.ExpandListColumn(C, "C"), 
  E = Table.SplitColumn(
    D, 
    "C", 
    Splitter.SplitTextByDelimiter("-", QuoteStyle.Csv), 
    {"Emp Names", "Salary", "Promotion Date"}
  ), 
  F = Table.Sort(E, {{"Dept ID", Order.Ascending}, {"Emp Names", Order.Ascending}}), 
  G = Table.ReorderColumns(F, {"Dept ID", "Emp Names", "Promotion Date", "Salary"})
in
  G
Power Query solution 9 for Clean Salary and Promotion Data, proposed by Alexandre Garcia:
let
  a = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  b = {"Dept ID", "Emp Names", "Salary", "Promotion Date"}, 
  c = Table.FillDown(a, {b{0}}), 
  d = List.TransformMany(
    Table.ToRows(c), 
    (x) =>
      List.Transform(
        List.RemoveNulls(List.Skip(x, 2)), 
        (x) =>
          let
            a = Text.Split(x, "-")
          in
            Record.FromList(a, List.FirstN(List.Skip(b), List.Count(a)))
      ), 
    (x, y) => [Dept ID = x{0}] & y
  ), 
  Sol = Table.Sort(
    Table.SelectColumns(Table.FromRecords(d, b, MissingField.UseNull), List.Sort(b)), 
    {{b{0}, 0}, {b{1}, 0}}
  )
in
  Sol
Power Query solution 10 for Clean Salary and Promotion Data, proposed by Francesco Bianchi 🇮🇹:
let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  FilledDown = Table.FillDown(Source, {"Dept ID"}), 
  GroupedRows = Table.Group(
    FilledDown, 
    {"Dept ID"}, 
    {
      {
        "All", 
        each Table.Distinct(
          Table.Buffer(
            Table.Sort(
              Table.FromColumns(
                Record.ToList(
                  Record.RemoveFields(
                    [
                      a = List.Transform(
                        List.RemoveNulls(List.Combine(List.Skip(Table.ToColumns(_)))), 
                        each Text.Split(_, "-")
                      ), 
                      Emp Names = List.Transform(a, each _{0}), 
                      Promotion Date = List.Transform(
                        a, 
                        each try Date.FromText(_{2}, "en-US") otherwise null
                      ), 
                      Salary = List.Transform(a, each Number.From(_{1}))
                    ], 
                    {"a"}
                  )
                )
              ), 
              {{"Column1", Order.Ascending}, {"Column2", Order.Descending}}
            )
          ), 
          {"Column1"}
        )
      }
    }
  ), 
  Sol = Table.ExpandTableColumn(
    GroupedRows, 
    "All", 
    {"Column1", "Column2", "Column3"}, 
    {"Emp Names", "Promotion Date", "Salary"}
  )
in
  Sol
Power Query solution 11 for Clean Salary and Promotion Data, proposed by Gertjan Davies:
let
  Source = Problem, 
  Fill = Table.FillDown(Source, {"Dept ID"}), 
  Grouping = Table.Group(
    Fill, 
    {"Dept ID"}, 
    {
      {
        "Details", 
        each [[Promoted Employees], [Not Promoted Employees]], 
        type table [Promoted Employees = nullable text, Not Promoted Employees = nullable text]
      }
    }
  ), 
  Employees = Table.AddColumn(
    Grouping, 
    "Employees", 
    each List.RemoveNulls(List.Combine(Table.ToColumns([Details])))
  ), 
  Extract = Table.ExpandListColumn(Employees, "Employees"), 
  Split = Table.SplitColumn(
    Extract, 
    "Employees", 
    Splitter.SplitTextByDelimiter("-", QuoteStyle.Csv), 
    {"Emp Name", "Salary", "Promotion Date"}
  ), 
  ChangedType = Table.TransformColumnTypes(
    Split, 
    {{"Emp Name", type text}, {"Salary", Int64.Type}, {"Promotion Date", type date}}, 
    "en-US"
  ), 
  Sort = Table.Sort(ChangedType, {{"Dept ID", Order.Ascending}, {"Emp Name", Order.Ascending}})[
    [Dept ID], 
    [Emp Name], 
    [Promotion Date], 
    [Salary]
  ]
in
  Sort
Power Query solution 12 for Clean Salary and Promotion Data, proposed by Sanket Doijode:
let
  Source = Table.TransformColumns(
    Table.FillDown(
      Table.RemoveColumns(
        Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
        "Highest Paid Employee"
      ), 
      {"Dept ID"}
    ), 
    {{"Not Promoted Employees", each _ & "-null"}}
  ), 
  Result = Table.ReorderColumns(
    Table.ExpandTableColumn(
      Table.Group(
        Source, 
        {"Dept ID"}, 
        {
          {
            "Count", 
            each Table.FromList(
              List.Sort(
                List.RemoveNulls(List.Combine(Table.ToColumns(Table.RemoveColumns(_, "DeptID")))), 
                Order.Ascending
              ), 
              Splitter.SplitTextByDelimiter("-"), 
              {"Emp Names", "Salary", "Promotion    Date"}
            )
          }
        }
      ), 
      "Count", 
      {"Emp Names", "Salary", "Promotion Date"}
    ), 
    {"Dept ID", "Emp Names", "Promotion Date", "Salary"}
  )
in
  Result
Power Query solution 13 for Clean Salary and Promotion Data, proposed by Sahan Jayasuriya:
let
  Source = Excel.CurrentWorkbook(){[Name = "Data"]}[Content], 
  FilledDown = Table.FillDown(Source, {"Dept ID"}), 
  GroupedRows = Table.Group(FilledDown, {"Dept ID"}, {{"All", each _}}), 
  TableTransform = Table.TransformColumns(
    GroupedRows, 
    {
      {
        "All", 
        each [
          a = List.RemoveFirstN(Table.ToColumns(_), 1), 
          b = List.Combine(List.Transform(a, (x) => List.RemoveNulls(x))), 
          c = List.Select(
            b, 
            (x) =>
              Text.Length(x)
                = List.Max(
                  List.Transform(
                    List.Select(b, (k) => Text.Contains(k, Text.BeforeDelimiter(x, "-"))), 
                    (p) => Text.Length(p)
                  )
                )
          ), 
          d = List.Distinct(c), 
          e = List.Transform(d, (x) => Text.Split(x, "-")), 
          f = List.Transform(e, (x) => if List.Count(x) < 3 then x & {null} else x), 
          g = Table.FromRows(f, {"Emp Name", "Salary", "Promotion Date"})
        ][g]
      }
    }
  ), 
  ExpandedTable = Table.ExpandTableColumn(
    TableTransform, 
    "All", 
    {"Emp Name", "Promotion Date", "Salary"}
  ), 
  ChangedType = Table.TransformColumnTypes(
    ExpandedTable, 
    {
      {"Dept ID", Int64.Type}, 
      {"Emp Name", type text}, 
      {"Promotion Date", type date}, 
      {"Salary", Int64.Type}
    }
  ), 
  SortedRows = Table.Sort(
    ChangedType, 
    {{"Dept ID", Order.Ascending}, {"Emp Name", Order.Ascending}}
  )
in
  SortedRows

Solving the challenge of Clean Salary and Promotion Data with Excel

Excel solution 1 for Clean Salary and Promotion Data, proposed by Bo Rydobon 🇹🇭:
=LET(
    c,
    C2:D13,
    d,
    HSTACK(
        SCAN(
            ,
            TOCOL(
                IFS(
                    c>0,
                    A2:A13
                ),
                3
            ),
            MAX
        ),
        TEXTSPLIT(
            CONCAT(
                c&"|"
            ),
            "-",
            "|",
            1,
            ,
            ""
        )
    ),
    SORTBY(
        SORT(
            IFERROR(
                --d,
                d
            ),
            {1,
            2}
        ),
        {1,
        2,
        4,
        3}
    )
)
Excel solution 2 for Clean Salary and Promotion Data, proposed by Bo Rydobon 🇹🇭:
=LET(
    c,
    B2:D13,
    d,
    TEXTSPLIT(
        CONCAT(
            TOCOL(
                c,
                3
            )&"|"
        ),
        "-",
        "|",
        1,
        ,
        0
    ),
    g,
    GROUPBY(
        HSTACK(
            TOCOL(
                IFS(
                    c>0,
                    SCAN(
                        ,
                        A2:A13,
                        LAMBDA(
                            a,
                            v,
                            IF(
                                v,
                                v,
                                a
                            )
                        )
                    )
                ),
                3
            ),
            TAKE(
                d,
                ,
                1
            )
        ),
        --CHOOSECOLS(
            d,
            3,
            2
        ),
        MAX,
        ,
        0
    ),
    IF(
        g>0,
        g,
        ""
    )
)
Excel solution 3 for Clean Salary and Promotion Data, proposed by 🇰🇷 Taeyong Shin:
=LET(
    d,
    C2:D13,
    r,
    REGEXREPLACE(
        SCAN(
            ,
            TOCOL(
                IFS(
                    d>0,
                    A2:A13
                ),
                2
            ),
            MAX
        )&TOCOL(
            d,
            1
       & ),
        "(d+)(pL+)-((?1))-?([/d]+)?",
        "$"&{1,
        2,
        4,
        3}
    ),
    SORT(
        IFERROR(
            --r,
            r
        ),
        {1,
        2}
    )
)
Excel solution 4 for Clean Salary and Promotion Data, proposed by Kris Jaganah:
=LET(
    a,
    SCAN(
        0,
        A2:A13,
        LAMBDA(
            x,
            y,
            IF(
                y,
                y,
                x
            )
        )
    ),
    b,
    TEXTSPLIT(
        CONCAT(
            D2:D13&","
        ),
        "-",
        ",",
        1
    ),
    c,
    TEXTSPLIT(
        CONCAT(
            C2:C13&","
        ),
        "-",
        ","
    ),
    d,
    VSTACK(
        a&"-"&TAKE(
            b,
            ,
            1
        ),
        DROP(
            a&"-"&TAKE(
                c,
                ,
                1
            ),
            -1
        )
    ),
    e,
    FILTER(
        d,
        TEXTAFTER(
            d,
            "-"
        )<>""
    ),
    f,
    --TEXTBEFORE(
        e,
        "-"
    ),
    g,
    TEXTAFTER(
        e,
        "-"
    ),
    h,
    IFNA(
        TEXT(
            VLOOKUP(
                g,
                c,
                3,
                0
            ),
            "m/dd/yyy"
        ),
        ""
    ),
    i,
    --IFNA(
        VLOOKUP(
            g,
            b,
            2,
            0
        ),
        VLOOKUP(
            g,
            c,
            2,
            0
        )
    ),
    VSTACK(
        {"Dept ID",
        "Emp Names",
        "Promotion Date",
        "Salary"},
        SORT(
            HSTACK(
                f,
                g,
                h,
                i
            ),
            {1,
            2}
        )
    )
)
Excel solution 5 for Clean Salary and Promotion Data, proposed by Julian Poeltl:
=LET(
    T,
    TOCOL(
        SCAN(
            ,
            A2:A13,
            LAMBDA(
                A,
                B,
                IF(
                    B="",
                    A,
                    B
                )
            )
        )&"-"&C2:D13
    ),
    S,
    TEXTSPLIT(
        TEXTJOIN(
            "|",
            ,
            FILTER(
                T,
                LEN(
                    T
                )>2
            )
        ),
        "-",
        "|"
    ),
    C,
    IFNA(
        IFERROR(
            --S,
            S
        ),
        ""
    ),
    SORT(
        SORT(
            HSTACK(
                TAKE(
                    C,
                    ,
                    2
                ),
                TAKE(
                    C,
                    ,
                    -1
                ),
                CHOOSECOLS(
                    C,
                    3
                )
            ),
            2
        ),
        1
    )
)
Excel solution 6 for Clean Salary and Promotion Data, proposed by Oscar Mendez Roca Farell:
=LET(d,
    C2:D13,
    h,
    HSTACK(TOCOL(SCAN(
        ,
        A2:A13,
        MAX
    )/(d>0),
    3),
    TEXTSPLIT(
        CONCAT(
            d&"|"
        ),
        "-",
        "|",
        1,
        ,
        ""
    )),
    SORTBY(
        SORT(
            IFERROR(
                --h,
                h
            ),
            {1,
            2}
        ),
        {1,
        2,
        4,
        3}
    ))
Excel solution 7 for Clean Salary and Promotion Data, proposed by Sunny Baggu:
=LET(
    
     _a,
     SCAN(
         "",
          A2:A13,
          LAMBDA(
              a,
               v,
               IF(
                   v = "",
                    a,
                    v
               )
          )
     ),
    
     _b,
     IF(
         
          B2:D13 = "",
         
          x,
         
          HSTACK(
              IF(
                  ISNUMBER(
                      SEARCH(
                          TEXTBEFORE(
                              B2:B13,
                               "-"
                          ),
                           C2:C13
                      )
                  ),
                   x,
                   SUBSTITUTE(
                       B2:B13,
                        "-",
                        "--"
                   )
              ),
               C2:C13,
               SUBSTITUTE(
                   D2:D13,
                    "-",
                    "--"
               )
          )
          
     ),
    
     REDUCE(
         
          {"Dept ID",
          "Emp Names",
          "Promotion Date",
          "Salary"},
         
          UNIQUE(
              _a
          ),
         
          LAMBDA(
              x,
               y,
              
               VSTACK(
                   
                    x,
                   
                    SORTBY(
                        
                         UNIQUE(
                             
                              SORT(
                                  
                                   IFNA(
                                       
                                        DROP(
                                            REDUCE(
                                                "",
                                                 TOCOL(
                                                     FILTER(
                                                         _b,
                                                          _a = y
                                                     ),
                                                      3
                                                 ),
                                                 LAMBDA(
                                                     a,
                                                      v,
                                                      VSTACK(
                                                          a,
                                                           HSTACK(
                                                               y,
                                                                TEXTSPLIT(
                                                                    v,
                                                                     "-",
                                                                     ,
                                                                     1
                                                                )
                                                           )
                                                      )
                                                 )
                                            ),
                                             1
                                        ),
                                       
                                        ""
                                        
                                   ),
                                  
                                   {2,
                                   3}
                                   
                              )
                              
                         ),
                        
                         {1,
                         2,
                         4,
                         3},
                        
                         1
                         
                    )
                    
               )
               
          )
          
     )
    
)
Excel solution 8 for Clean Salary and Promotion Data, proposed by LEONARD OCHEA 🇷🇴:
=LET(
    t,
    A2:D13,
    C,
    CHOOSECOLS,
    V,
    TOCOL,
    d,
    C(
        t,
        2,
        3,
        4
    ),
    x,
    V(
        IF(
            d>"",
            SCAN(
                ,
                C(
                    t,
                    1
                ),
                MAX
            ),
            z
        ),
        3
    ),
    y,
    V(
        d,
        1
    ),
    z,
    TEXTSPLIT(
        TEXTJOIN(
            "|",
            ,
            y
        ),
        "-",
        "|"
    ),
    IFNA(
        GROUPBY(
            HSTACK(
                x,
                C(
                    z,
                    1
                )
            ),
            --C(
                z,
                3,
                2
            ),
            SINGLE,
            ,
            0
        ),
        ""
    )
)
Excel solution 9 for Clean Salary and Promotion Data, proposed by Md. Zohurul Islam:
=LET(
    
     A,
     A2:A13,
    
     B,
     C1:D1,
    
     C,
     C2:D13,
    
     ids,
     SCAN(
         0,
          A,
          LAMBDA(
              x,
               y,
               MAX(
                   x,
                    y
               )
          )
     ),
    
     deptID,
     TOCOL(
         IFNA(
             ids,
              B
         )
     ),
    
     data,
     TOCOL(
         C,
          0
     ),
    
     rng,
     UNIQUE(
         FILTER(
             HSTACK(
                 deptID,
                  data
             ),
              data <> 0
         ),
          ,
          FALSE
     ),
    
     names,
     TEXTBEFORE(
         CHOOSECOLS(
             rng,
              2
         ),
          "-"
     ),
    
     D,
     TEXTAFTER(
         CHOOSECOLS(
             rng,
              2
         ),
          names & "-"
     ),
    
     E,
     DROP(
         
          REDUCE(
              
               "",
              
               D,
              
               LAMBDA(
                   x,
                    y,
                   
                    LET(
                        
                         p,
                         TEXTSPLIT(
                             y,
                              "-"
                         ),
                        
                         q,
                         IFERROR(
                             VSTACK(
                                 x,
                                  p
                             ),
                              ""
                         ),
                        
                         q
                         
                    )
                    
               )
               
          ),
         
          1
          
     ),
    
     F,
     CHOOSECOLS(
         E,
          2,
          1
     ),
    
     rID,
     CHOOSECOLS(
         rng,
          1
     ),
    
     G,
     HSTACK(
         rID,
          names,
          F
     ),
    
     H,
     SORTBY(
         G,
          CHOOSECOLS(
              G,
               1
          ),
          1,
          CHOOSECOLS(
              G,
               2
          ),
          1
     ),
    
     J,
     UNIQUE(
         H,
          FALSE,
          TRUE
     ),
    
     result,
     IFERROR(
         ABS(
             J
         ),
          J
     ),
    
     header,
     {"Dept ID",
     "Emp Name",
     "Promotion Date",
     "Salary"},
    
     Reort,
     VSTACK(
         header,
          result
     ),
    
     Reort
    
)
Excel solution 10 for Clean Salary and Promotion Data, proposed by Hamidi Hamid:
=LET(x,
    SCAN(
        ,
        A2:A13,
        LAMBDA(
            a,
            b,
            IF(
                b,
                b,
                a
            )
        )
    ),
    y,
    IFERROR(
        TOCOL(
            IF(
                B2:D13>0,
                x,
                1/0
            )
        ),
        0
    ),
    g,
    TOCOL(
        B2:D13
    ),
    h,
    IFERROR(
        TEXTBEFORE(
            g,
            "-"
        ),
        ""
    ),
    i,
    IFERROR(
        IFNA(
            TEXTBEFORE(
                TEXTAFTER(
                    g,
                    "-",
                    
                ),
                "-",
                
            ),
            TEXTAFTER(
                g,
                "-",
                
            )
        )*1,
        0
    )+SEQUENCE(
        COUNTA(
            g
        )
    ),
    ii,
    i-SEQUENCE(
        COUNTA(
            g
        )
    ),
    f,
    IFERROR(
        IF(
            TEXTAFTER(
                g,
                "-",
                -1
            )*1=ii,
            "",
            TEXTAFTER(
                g,
                "-",
                -1
            )
        )*1,
        ""
    ),
    rr,
    XLOOKUP(
        h,
        h,
        f,
        ,
        ,
        -1
    ),
    v,
    (XLOOKUP(
        h,
        h,
        i,
        ,
        ,
        1
    )=i)*i,
    rec,
    DROP(
        HSTACK(
            y,
            h,
            f,
            ii,
            v,
            rr
        ),
        -1
    ),
    t,
    FILTER(rec,
    (CHOOSECOLS(
        rec,
        4
    )>0)*(CHOOSECOLS(
        rec,
        5
    )>0),
    ),
    SORT(
        DROP(
            t,
            ,
            -2
        ),
        {124},
        1
    ))
Excel solution 11 for Clean Salary and Promotion Data, proposed by ferhat CK:
=LET(
    a,
    SCAN(
        0,
        A2:A13,
        LAMBDA(
            x,
            y,
            IF(
                y=0,
                x,
                y
            )
        )
    ),
    b,
    C2:D13,
    ta,
    TEXTBEFORE,
    tb,
    TEXTAFTER,
    c,
    DROP(
        REDUCE(
            0,
            UNIQUE(
                a
            ),
            LAMBDA(
                x,
                y,
                VSTACK(
                    x,
                    LET(
                        q,
                        SUM(
                            N(
                                a=y
                            )
                        ),
                        w,
                        CHOOSEROWS(
                            b,
                            SEQUENCE(
                                q,
                                ,
                                MATCH(
                                    y,
                                    a,
                                    0
                                )
                            )
                        ),
                        r,
                        TOCOL(
                            w,
                            3
                        ),
                        HSTACK(
                            SEQUENCE(
                                COUNTA(
                                    r
                                )
                            )^0*y,
                            r
                        )
                    )
                )
            )
        ),
        1
    ),
    Emp,
    ta(
        TAKE(
            c,
            ,
            -1
        ),
        "-"
    ),
    Pro,
    IFNA(
        tb(
            TAKE(
            c,
            ,
            -1
        ),
            "-",
            2
        ),
        ""
    ),
    Sal,
    IFNA(
        tb(
            ta(
                TAKE(
            c,
            ,
            -1
        ),
                "-",
                2
            ),
            "-"
        ),
        tb(
            TAKE(
            c,
            ,
            -1
        ),
            "-"
        )
    ),
    z,
    HSTACK(
        TAKE(
            c,
            ,
            1
        ),
        Emp,
        Pro,
        Sal
    ),
    SORT(
        z,
        {1,
        2}
    )
)
Excel solution 12 for Clean Salary and Promotion Data, proposed by Imam Hambali:
=LET(
    
    id,
     A2:A13,
    
    idn,
     SCAN(
         0,
          id,
          LAMBDA(
              a,
              b,
               IF(
                   b=0,
                   a+b,
                    a+1
               )
          )
     ),
    
    ta,
     TEXTAFTER(
         "-"&TOCOL(
             idn&"-"&C2:D13,
             1
         ),
         "-",
         {1,
         2,
         3,
         4}
     ),
    
    n,
     IFERROR(
         TEXTBEFORE(
             ta,
             "-",
             ,
             ,
             ,
             ta
         ),
         ""
     ),
    
    VSTACK(
        F1:I1,
         SORT(
             CHOOSECOLS(
                 FILTER(
                     n,
                      CHOOSECOLS(
                          n,
                          2
                      )<>""
                 ),
                 1,
                 2,
                 4,
                 3
             ),
             {1,
             2}
         )
    )
    
)

Solving the challenge of Clean Salary and Promotion Data with Python

Python solution 1 for Clean Salary and Promotion Data, proposed by Konrad Gryczan, PhD:
import pandas as pd
path = "PQ_Challenge_226.xlsx"
input = pd.read_excel(path, usecols="A:D", nrows=13)
test = pd.read_excel(path, usecols="F:I", nrows=19).rename(columns=lambda x: x.replace('.1', ''))
input['Dept ID'] = input['Dept ID'].ffill().astype('int64')
input = input.drop(columns=['Highest Paid Employee']).melt(id_vars=['Dept ID'], var_name='name', value_name='Value')
input[['Emp Names', 'Salary', 'Promotion Date']] = input['Value'].str.split('-', expand=True)
input = input.d&rop(columns=['name', 'Value']).dropna(subset=['Emp Names']).sort_values(by=['Dept ID', 'Emp Names'])
input['Promotion Date'] = pd.to_datetime(input['Promotion Date'], format="%m/%d/%Y")
input['Salary'] = pd.to_numeric(input['Salary']).astype('int64')
result = input[['Dept ID', 'Emp Names', 'Promotion Date', 'Salary']].reset_index(drop=True)
print(result.equals(test)) # True
                    
                  

Solving the challenge of Clean Salary and Promotion Data with Python in Excel

Python in Excel solution 1 for Clean Salary and Promotion Data, proposed by Alejandro Campos:
df = xl("A1:D13", headers=True)
df['Dept ID'].ffill(inplace=True)
def process_emp_data(did, data, p=False):
 return [(did, e.split('-')[0], pd.to_datetime(e.split('-')[2], format='%d/%m/%Y', errors='coerce')
 .strftime('%d/%m/%Y') if p and len(e.split('-')) > 2 else None, e.split('-')[1]
 if len(e.split('-')) > 1 else None) for e in data.split(', ')] if pd.notna(data) else []
pe = {}
for _, r in df.iterrows():
 for p in process_emp_data(r['Dept ID'], r['Promoted Employees'], True)
 + process_emp_data(r['Dept ID'], r['Not Promoted Employees']):
 pe.setdefault((r['Dept ID'], p[1]), p)
final_df = pd.DataFrame(pe.values(), columns=['Dept ID', 'Emp Names', 'Promotion Date', 'Salary'])
 .sort_values(by=['Dept ID', 'Emp Names']).reset_index(drop=True).fillna(' ')
                    
                  
Python in Excel solution 2 for Clean Salary and Promotion Data, proposed by Abdallah Ally:
df = xl("A1:D13", headers=True)
# Perform data munging
df['Dept ID'] = df['Dept ID'].ffill()
values = []
for i in df.index:
 row = df.loc[i].values
 values.append([row[0], [v for v in row[2:] if v]])
df = pd.DataFrame(data=values, columns=['Emp ID', 'Data']).explode(column='Data')
df[['Emp Names', 'Salary', 'Promotion Date']] = df.Data.str.split('-', expand=True)
df = df.iloc[:, [0, 2, 4, 3]].sort_values(by=['Emp ID', 'Emp Names'], ignore_index=True)
df['Promotion Date'] = df['Promotion Date'].map(
 lambda x: pd.to_datetime(x, format='%m/%d/%Y') if x else ''
)
df
                    
                  
Python in Excel solution 3 for Clean Salary and Promotion Data, proposed by Victor Wang:
df = xl("A1:D13", headers=True)
df.drop('Highest Paid Employee', axis=1, inplace=True)
df['Dept ID'] = df['Dept ID'].ffill()
unpivot = pd.melt(df, id_vars='Dept ID')
remove_blanks = unpivot[unpivot['value'].notnull()]
remove_blanks[['Emp Names', 'Salary', 'Promotion Date']] = remove_blanks['value'].str.split('-', n=0, expand=True).fillna('')
result = remove_blanks[['Dept ID', 'Emp Names', 'Promotion Date', 'Salary']].sort_values(by=['Dept ID', 'Emp Names']).reset_index(drop=True)
result
                    
                  

Solving the challenge of Clean Salary and Promotion Data with R

R solution 1 for Clean Salary and Promotion Data, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "Power Query/PQ_Challenge_226.xlsx"
input = read_excel(path, range = "A1:D13")
test = read_excel(path, range = "F1:I19")
result = input %>%
 fill(`Dept ID`) %>%
 select(-`Highest Paid Employee`) %>%
 pivot_longer(-`Dept ID`, values_to = "Value") %>%
 separate(Value, into = c("Emp Names", "Salary", "Promotion Date"), sep = "-") %>%
 select(-name) %>%
 filter(!is.na(`Emp Names`)) %>%
 arrange(`Dept ID`, `Emp Names`) %>%
 mutate(`Promotion Date` = as.POSIXct(`Promotion Date`, format = "%m/%d/%Y", tz = "UTC"),
 Salary = as.numeric(Salary)) %>%
 select(`Dept ID`, `Emp Names`, `Promotion Date`, Salary)
all.equal(result, test, check.attributes = FALSE)
#> [1] TRUE
                    
                  

&

Leave a Reply