Clean Salary and Promotion Data

Clean the given data and align them properly in departments and sort on Dept ID & Names. Christian-65600: 65600 is salary If it is accompanied with the date, then that is promotion date. Ex. Carolyn-51900-11/12/2023

📌 Challenge Details and Links
ExcelBI Power Query Challenge Number: 226
Challenge Difficulty: ⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Clean Salary and Promotion Data with Power Query

Power Query solution 1 for Clean Salary and Promotion Data, proposed by Zoran Milokanović:

let
  Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content], 
  H = {"Dept ID", "Emp Names", "Promotion Date", "Salary"}, 
  S = Table.Sort(
    Table.FromList(
      Table.ToRows(
        Table.UnpivotOtherColumns(
          Table.FillDown(Source, {H{0}}), 
          {H{0}, "Highest Paid Employee"}, 
          "A", 
          "V"
        )
      ), 
      each 
        let
          t = Text.Split(_{3}, "-")
        in
          {_{0}, t{0}, Date.From(t{2}?, "en-US"), Number.From(t{1})}, 
      H
    ), 
    {H{0}, H{1}}
  )
in
  S

Power Query solution 2 for Clean Salary and Promotion Data, proposed by Kris Jaganah:

let
  A = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  B = Table.FillDown(A, {"Dept ID"}), 
  C = (y) =>
    [
      a = Table.SelectRows(B, (x) => x[Dept ID] = y), 
      b = Table.UnpivotOtherColumns(a, {"Dept ID", "Highest Paid Employee"}, "A", "V"), 
      c = Table.SplitColumn(
        b, 
        "V", 
        each Text.Split(_, "-"), 
        {"Emp Names", "Salary", "Promotion Date"}
      ), 
      d = Table.TransformColumns(
        c, 
        {{"Promotion Date", each Date.FromText(_, [Format = "M/d/yyyy"])}, {"Salary", Number.From}}
      ), 
      e = Table.Sort(d, {"Emp Names", 0})
    ][e], 
  D = Table.Combine(
    List.Transform(
      List.Distinct(B[Dept ID]), 
      each C(_)[[Dept ID], [Emp Names], [Promotion Date], [Salary]]
    )
  )
in
  D

Power Query solution 3 for Clean Salary and Promotion Data, proposed by Alejandro Simón 🇵🇦 🇪🇸:

let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  Group = Table.Group(
    Source, 
    "Dept ID", 
    {
      {
        "A", 
        each 
          let
            a = _, 
            b = List.Skip(Table.ToColumns(a), 2), 
            c = List.RemoveNulls(List.Combine(b)), 
            d = List.Transform(
              c, 
              each 
                let
                  d1 = Text.Split(_, "-"), 
                  d2 = 
                    if List.Count(d1) = 3 then
                      {d1{0}} & {Date.From(List.Last(d1))} & {d1{1}}
                    else
                      {d1{0}} & {null} & {d1{1}}
                in
                  d2
            ), 
            e = Table.Sort(
              Table.FromRows(d, {"Emp Names", "Promotion Date", "Salary"}), 
              "Emp Names"
            )
          in
            e
      }
    }, 
    0, 
    (a, b) => Number.From(b <> null)
  ), 
  Sol = Table.ExpandTableColumn(Group, "A", Table.ColumnNames(Group[A]{0}))
in
  Sol

Power Query solution 4 for Clean Salary and Promotion Data, proposed by Luan Rodrigues:

let
  Fonte = Tabela1, 
  grp = Table.Group(
    Fonte, 
    "Dept ID", 
    {
      {
        "tab", 
        each 
          let
            a = List.RemoveNulls(List.Skip(List.Combine(Table.ToColumns(_)))), 
            b = Table.Combine(List.Transform(a, (x) => Table.FromRows({Text.Split(x, "-")}))), 
            c = Table.RenameColumns(
              b, 
              List.Zip({Table.ColumnNames(b), {"Emp Names", "Salary", "Promotion Date"}})
            )[[Emp Names], [Promotion Date], [Salary]]
          in
            c
      }
    }, 
    0, 
    (a, b) => Number.From(b <> null)
  ), 
  exp = Table.ExpandTableColumn(grp, "tab", Table.ColumnNames(grp[tab]{0})), 
  srt = Table.Sort(exp, {"Dept ID", "Emp Names"})
in
  srt

Power Query solution 5 for Clean Salary and Promotion Data, proposed by Hussein SATOUR:

let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  FillDown = Table.FillDown(Source, {"Dept ID"}), 
  UnpivotOther = Table.UnpivotOtherColumns(FillDown, {"Dept ID"}, "Attribute", "Value"), 
  RemovCols = Table.RemoveColumns(UnpivotOther, {"Attribute"}), 
  SplitValue = Table.SplitColumn(
    RemovCols, 
    "Value", 
    Splitter.SplitTextByDelimiter("-"), 
    {"Emp Name", "Salary", "Promotion"}
  ), 
  SortRows = Table.Buffer(
    Table.Sort(SplitValue, {{"Dept ID", 0}, {"Emp  
  Name", 0}, {"Promotion", 1}})
  ), 
  RemoveDuplicates = Table.Distinct(SortRows, {"Emp Name", "Salary"}), 
  ChangeTypes = Table.TransformColumnTypes(
    RemoveDuplicates, 
    {{"Promotion", type date}, {"Salary", Int64.Type}}
  )
in
  ChangeTypes

Power Query solution 6 for Clean Salary and Promotion Data, proposed by Abdallah Ally:

let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  FillDown = Table.FillDown(Source, {"Dept ID"}), 
  Transform = List.Transform(
    Table.ToRows(FillDown), 
    each [#"Dept ID" = _{0}, Data = List.Select(List.Skip(_, 2), each _ <> null)]
  ), 
  Expand = Table.ExpandListColumn(Table.FromRecords(Transform), "Data"), 
  Split = Table.SplitColumn(
    Expand, 
    "Data", 
    each Text.Split(_, "-"), 
    {"Emp Names", "Salary", "Promotion Date"}
  ), 
  Sort = Table.Sort(Split, {{"Dept ID", 0}, {"Emp Names", 0}}), 
  Select = Table.SelectColumns(Sort, {"Dept ID", "Emp Names", "Promotion Date", "Salary"}), 
  Result = Table.TransformColumnTypes(Select, {"Promotion Date", type date})
in
  Result

Power Query solution 7 for Clean Salary and Promotion Data, proposed by Eric Laforce:

let
  CN = {"Emp Names", "Promotion Date", "Salary"}, 
  Source = Excel.CurrentWorkbook(){[Name = "tData226"]}[Content], 
  FillDown = Table.FillDown(Source, {"Dept ID"}), 
  Group = Table.Group(
    FillDown, 
    {"Dept ID"}, 
    {
      "G", 
      each 
        let
          _Values = List.RemoveNulls([Promoted Employees] & [Not Promoted Employees]), 
          _T = Table.FromColumns({_Values}, {"ESP"}), 
          _Split = Table.SplitColumn(
            _T, 
            "ESP", 
            Splitter.SplitTextByDelimiter("-"), 
            {CN{0}, CN{2}, CN{1}}
          ), 
          _Sort = Table.Sort(_Split, CN{0})
        in
          _Sort
    }
  ), 
  Expand = Table.ExpandTableColumn(Group, "G", CN), 
  ChgType = Table.TransformColumnTypes(Expand, {{CN{1}, type date}, {CN{2}, Int64.Type}})
in
  ChgType

Power Query solution 8 for Clean Salary and Promotion Data, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:

let
  S = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  A = Table.FillDown(S, {"Dept ID"}), 
  B = Table.AddColumn(A, "C", each List.RemoveNulls(List.Skip(Record.ToList(_), 2))), 
  C = Table.SelectColumns(B, {"Dept ID", "C"}), 
  D = Table.ExpandListColumn(C, "C"), 
  E = Table.SplitColumn(
    D, 
    "C", 
    Splitter.SplitTextByDelimiter("-", QuoteStyle.Csv), 
    {"Emp Names", "Salary", "Promotion Date"}
  ), 
  F = Table.Sort(E, {{"Dept ID", Order.Ascending}, {"Emp Names", Order.Ascending}}), 
  G = Table.ReorderColumns(F, {"Dept ID", "Emp Names", "Promotion Date", "Salary"})
in
  G

Power Query solution 9 for Clean Salary and Promotion Data, proposed by Alexandre Garcia:

let
  a = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  b = {"Dept ID", "Emp Names", "Salary", "Promotion Date"}, 
  c = Table.FillDown(a, {b{0}}), 
  d = List.TransformMany(
    Table.ToRows(c), 
    (x) =>
      List.Transform(
        List.RemoveNulls(List.Skip(x, 2)), 
        (x) =>
          let
            a = Text.Split(x, "-")
          in
            Record.FromList(a, List.FirstN(List.Skip(b), List.Count(a)))
      ), 
    (x, y) => [Dept ID = x{0}] & y
  ), 
  Sol = Table.Sort(
    Table.SelectColumns(Table.FromRecords(d, b, MissingField.UseNull), List.Sort(b)), 
    {{b{0}, 0}, {b{1}, 0}}
  )
in
  Sol

Power Query solution 10 for Clean Salary and Promotion Data, proposed by Francesco Bianchi 🇮🇹:

let
  Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  FilledDown = Table.FillDown(Source, {"Dept ID"}), 
  GroupedRows = Table.Group(
    FilledDown, 
    {"Dept ID"}, 
    {
      {
        "All", 
        each Table.Distinct(
          Table.Buffer(
            Table.Sort(
              Table.FromColumns(
                Record.ToList(
                  Record.RemoveFields(
                    [
                      a = List.Transform(
                        List.RemoveNulls(List.Combine(List.Skip(Table.ToColumns(_)))), 
                        each Text.Split(_, "-")
                      ), 
                      Emp Names = List.Transform(a, each _{0}), 
                      Promotion Date = List.Transform(
                        a, 
                        each try Date.FromText(_{2}, "en-US") otherwise null
                      ), 
                      Salary = List.Transform(a, each Number.From(_{1}))
                    ], 
                    {"a"}
                  )
                )
              ), 
              {{"Column1", Order.Ascending}, {"Column2", Order.Descending}}
            )
          ), 
          {"Column1"}
        )
      }
    }
  ), 
  Sol = Table.ExpandTableColumn(
    GroupedRows, 
    "All", 
    {"Column1", "Column2", "Column3"}, 
    {"Emp Names", "Promotion Date", "Salary"}
  )
in
  Sol

Power Query solution 11 for Clean Salary and Promotion Data, proposed by Gertjan Davies:

let
  Source = Problem, 
  Fill = Table.FillDown(Source, {"Dept ID"}), 
  Grouping = Table.Group(
    Fill, 
    {"Dept ID"}, 
    {
      {
        "Details", 
        each [[Promoted Employees], [Not Promoted Employees]], 
        type table [Promoted Employees = nullable text, Not Promoted Employees = nullable text]
      }
    }
  ), 
  Employees = Table.AddColumn(
    Grouping, 
    "Employees", 
    each List.RemoveNulls(List.Combine(Table.ToColumns([Details])))
  ), 
  Extract = Table.ExpandListColumn(Employees, "Employees"), 
  Split = Table.SplitColumn(
    Extract, 
    "Employees", 
    Splitter.SplitTextByDelimiter("-", QuoteStyle.Csv), 
    {"Emp Name", "Salary", "Promotion Date"}
  ), 
  ChangedType = Table.TransformColumnTypes(
    Split, 
    {{"Emp Name", type text}, {"Salary", Int64.Type}, {"Promotion Date", type date}}, 
    "en-US"
  ), 
  Sort = Table.Sort(ChangedType, {{"Dept ID", Order.Ascending}, {"Emp Name", Order.Ascending}})[
    [Dept ID], 
    [Emp Name], 
    [Promotion Date], 
    [Salary]
  ]
in
  Sort

Power Query solution 12 for Clean Salary and Promotion Data, proposed by Sanket Doijode:

let
  Source = Table.TransformColumns(
    Table.FillDown(
      Table.RemoveColumns(
        Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
        "Highest Paid Employee"
      ), 
      {"Dept ID"}
    ), 
    {{"Not Promoted Employees", each _ & "-null"}}
  ), 
  Result = Table.ReorderColumns(
    Table.ExpandTableColumn(
      Table.Group(
        Source, 
        {"Dept ID"}, 
        {
          {
            "Count", 
            each Table.FromList(
              List.Sort(
                List.RemoveNulls(List.Combine(Table.ToColumns(Table.RemoveColumns(_, "DeptID")))), 
                Order.Ascending
              ), 
              Splitter.SplitTextByDelimiter("-"), 
              {"Emp Names", "Salary", "Promotion    Date"}
            )
          }
        }
      ), 
      "Count", 
      {"Emp Names", "Salary", "Promotion Date"}
    ), 
    {"Dept ID", "Emp Names", "Promotion Date", "Salary"}
  )
in
  Result

Power Query solution 13 for Clean Salary and Promotion Data, proposed by Sahan Jayasuriya:

let
  Source = Excel.CurrentWorkbook(){[Name = "Data"]}[Content], 
  FilledDown = Table.FillDown(Source, {"Dept ID"}), 
  GroupedRows = Table.Group(FilledDown, {"Dept ID"}, {{"All", each _}}), 
  TableTransform = Table.TransformColumns(
    GroupedRows, 
    {
      {
        "All", 
        each [
          a = List.RemoveFirstN(Table.ToColumns(_), 1), 
          b = List.Combine(List.Transform(a, (x) => List.RemoveNulls(x))), 
          c = List.Select(
            b, 
            (x) =>
              Text.Length(x)
                = List.Max(
                  List.Transform(
                    List.Select(b, (k) => Text.Contains(k, Text.BeforeDelimiter(x, "-"))), 
                    (p) => Text.Length(p)
                  )
                )
          ), 
          d = List.Distinct(c), 
          e = List.Transform(d, (x) => Text.Split(x, "-")), 
          f = List.Transform(e, (x) => if List.Count(x) < 3 then x & {null} else x), 
          g = Table.FromRows(f, {"Emp Name", "Salary", "Promotion Date"})
        ][g]
      }
    }
  ), 
  ExpandedTable = Table.ExpandTableColumn(
    TableTransform, 
    "All", 
    {"Emp Name", "Promotion Date", "Salary"}
  ), 
  ChangedType = Table.TransformColumnTypes(
    ExpandedTable, 
    {
      {"Dept ID", Int64.Type}, 
      {"Emp Name", type text}, 
      {"Promotion Date", type date}, 
      {"Salary", Int64.Type}
    }
  ), 
  SortedRows = Table.Sort(
    ChangedType, 
    {{"Dept ID", Order.Ascending}, {"Emp Name", Order.Ascending}}
  )
in
  SortedRows

Solving the challenge of Clean Salary and Promotion Data with Excel

Excel solution 1 for Clean Salary and Promotion Data, proposed by Bo Rydobon 🇹🇭:

=LET(
    c,
    C2:D13,
    d,
    HSTACK(
        SCAN(
            ,
            TOCOL(
                IFS(
                    c>0,
                    A2:A13
                ),
                3
            ),
            MAX
        ),
        TEXTSPLIT(
            CONCAT(
                c&"|"
            ),
            "-",
            "|",
            1,
            ,
            ""
        )
    ),
    SORTBY(
        SORT(
            IFERROR(
                --d,
                d
            ),
            {1,
            2}
        ),
        {1,
        2,
        4,
        3}
    )
)

Excel solution 2 for Clean Salary and Promotion Data, proposed by Bo Rydobon 🇹🇭:

=LET(
    c,
    B2:D13,
    d,
    TEXTSPLIT(
        CONCAT(
            TOCOL(
                c,
                3
            )&"|"
        ),
        "-",
        "|",
        1,
        ,
        0
    ),
    g,
    GROUPBY(
        HSTACK(
            TOCOL(
                IFS(
                    c>0,
                    SCAN(
                        ,
                        A2:A13,
                        LAMBDA(
                            a,
                            v,
                            IF(
                                v,
                                v,
                                a
                            )
                        )
                    )
                ),
                3
            ),
            TAKE(
                d,
                ,
                1
            )
        ),
        --CHOOSECOLS(
            d,
            3,
            2
        ),
        MAX,
        ,
        0
    ),
    IF(
        g>0,
        g,
        ""
    )
)

Excel solution 3 for Clean Salary and Promotion Data, proposed by 🇰🇷 Taeyong Shin:

=LET(
    d,
    C2:D13,
    r,
    REGEXREPLACE(
        SCAN(
            ,
            TOCOL(
                IFS(
                    d>0,
                    A2:A13
                ),
                2
            ),
            MAX
        )&TOCOL(
            d,
            1
       & ),
        "(d+)(pL+)-((?1))-?([/d]+)?",
        "$"&{1,
        2,
        4,
        3}
    ),
    SORT(
        IFERROR(
            --r,
            r
        ),
        {1,
        2}
    )
)

Excel solution 4 for Clean Salary and Promotion Data, proposed by Kris Jaganah:

=LET(
    a,
    SCAN(
        0,
        A2:A13,
        LAMBDA(
            x,
            y,
            IF(
                y,
                y,
                x
            )
        )
    ),
    b,
    TEXTSPLIT(
        CONCAT(
            D2:D13&","
        ),
        "-",
        ",",
        1
    ),
    c,
    TEXTSPLIT(
        CONCAT(
            C2:C13&","
        ),
        "-",
        ","
    ),
    d,
    VSTACK(
        a&"-"&TAKE(
            b,
            ,
            1
        ),
        DROP(
            a&"-"&TAKE(
                c,
                ,
                1
            ),
            -1
        )
    ),
    e,
    FILTER(
        d,
        TEXTAFTER(
            d,
            "-"
        )<>""
    ),
    f,
    --TEXTBEFORE(
        e,
        "-"
    ),
    g,
    TEXTAFTER(
        e,
        "-"
    ),
    h,
    IFNA(
        TEXT(
            VLOOKUP(
                g,
                c,
                3,
                0
            ),
            "m/dd/yyy"
        ),
        ""
    ),
    i,
    --IFNA(
        VLOOKUP(
            g,
            b,
            2,
            0
        ),
        VLOOKUP(
            g,
            c,
            2,
            0
        )
    ),
    VSTACK(
        {"Dept ID",
        "Emp Names",
        "Promotion Date",
        "Salary"},
        SORT(
            HSTACK(
                f,
                g,
                h,
                i
            ),
            {1,
            2}
        )
    )
)

Excel solution 5 for Clean Salary and Promotion Data, proposed by Julian Poeltl:

=LET(
    T,
    TOCOL(
        SCAN(
            ,
            A2:A13,
            LAMBDA(
                A,
                B,
                IF(
                    B="",
                    A,
                    B
                )
            )
        )&"-"&C2:D13
    ),
    S,
    TEXTSPLIT(
        TEXTJOIN(
            "|",
            ,
            FILTER(
                T,
                LEN(
                    T
                )>2
            )
        ),
        "-",
        "|"
    ),
    C,
    IFNA(
        IFERROR(
            --S,
            S
        ),
        ""
    ),
    SORT(
        SORT(
            HSTACK(
                TAKE(
                    C,
                    ,
                    2
                ),
                TAKE(
                    C,
                    ,
                    -1
                ),
                CHOOSECOLS(
                    C,
                    3
                )
            ),
            2
        ),
        1
    )
)

Excel solution 6 for Clean Salary and Promotion Data, proposed by Oscar Mendez Roca Farell:

=LET(d,
    C2:D13,
    h,
    HSTACK(TOCOL(SCAN(
        ,
        A2:A13,
        MAX
    )/(d>0),
    3),
    TEXTSPLIT(
        CONCAT(
            d&"|"
        ),
        "-",
        "|",
        1,
        ,
        ""
    )),
    SORTBY(
        SORT(
            IFERROR(
                --h,
                h
            ),
            {1,
            2}
        ),
        {1,
        2,
        4,
        3}
    ))

Excel solution 7 for Clean Salary and Promotion Data, proposed by Sunny Baggu:

=LET(
    
     _a,
     SCAN(
         "",
          A2:A13,
          LAMBDA(
              a,
               v,
               IF(
                   v = "",
                    a,
                    v
               )
          )
     ),
    
     _b,
     IF(
         
          B2:D13 = "",
         
          x,
         
          HSTACK(
              IF(
                  ISNUMBER(
                      SEARCH(
                          TEXTBEFORE(
                              B2:B13,
                               "-"
                          ),
                           C2:C13
                      )
                  ),
                   x,
                   SUBSTITUTE(
                       B2:B13,
                        "-",
                        "--"
                   )
              ),
               C2:C13,
               SUBSTITUTE(
                   D2:D13,
                    "-",
                    "--"
               )
          )
          
     ),
    
     REDUCE(
         
          {"Dept ID",
          "Emp Names",
          "Promotion Date",
          "Salary"},
         
          UNIQUE(
              _a
          ),
         
          LAMBDA(
              x,
               y,
              
               VSTACK(
                   
                    x,
                   
                    SORTBY(
                        
                         UNIQUE(
                             
                              SORT(
                                  
                                   IFNA(
                                       
                                        DROP(
                                            REDUCE(
                                                "",
                                                 TOCOL(
                                                     FILTER(
                                                         _b,
                                                          _a = y
                                                     ),
                                                      3
                                                 ),
                                                 LAMBDA(
                                                     a,
                                                      v,
                                                      VSTACK(
                                                          a,
                                                           HSTACK(
                                                               y,
                                                                TEXTSPLIT(
                                                                    v,
                                                                     "-",
                                                                     ,
                                                                     1
                                                                )
                                                           )
                                                      )
                                                 )
                                            ),
                                             1
                                        ),
                                       
                                        ""
                                        
                                   ),
                                  
                                   {2,
                                   3}
                                   
                              )
                              
                         ),
                        
                         {1,
                         2,
                         4,
                         3},
                        
                         1
                         
                    )
                    
               )
               
          )
          
     )
    
)

Excel solution 8 for Clean Salary and Promotion Data, proposed by LEONARD OCHEA 🇷🇴:

=LET(
    t,
    A2:D13,
    C,
    CHOOSECOLS,
    V,
    TOCOL,
    d,
    C(
        t,
        2,
        3,
        4
    ),
    x,
    V(
        IF(
            d>"",
            SCAN(
                ,
                C(
                    t,
                    1
                ),
                MAX
            ),
            z
        ),
        3
    ),
    y,
    V(
        d,
        1
    ),
    z,
    TEXTSPLIT(
        TEXTJOIN(
            "|",
            ,
            y
        ),
        "-",
        "|"
    ),
    IFNA(
        GROUPBY(
            HSTACK(
                x,
                C(
                    z,
                    1
                )
            ),
            --C(
                z,
                3,
                2
            ),
            SINGLE,
            ,
            0
        ),
        ""
    )
)

Excel solution 9 for Clean Salary and Promotion Data, proposed by Md. Zohurul Islam:

=LET(
    
     A,
     A2:A13,
    
     B,
     C1:D1,
    
     C,
     C2:D13,
    
     ids,
     SCAN(
         0,
          A,
          LAMBDA(
              x,
               y,
               MAX(
                   x,
                    y
               )
          )
     ),
    
     deptID,
     TOCOL(
         IFNA(
             ids,
              B
         )
     ),
    
     data,
     TOCOL(
         C,
          0
     ),
    
     rng,
     UNIQUE(
         FILTER(
             HSTACK(
                 deptID,
                  data
             ),
              data <> 0
         ),
          ,
          FALSE
     ),
    
     names,
     TEXTBEFORE(
         CHOOSECOLS(
             rng,
              2
         ),
          "-"
     ),
    
     D,
     TEXTAFTER(
         CHOOSECOLS(
             rng,
              2
         ),
          names & "-"
     ),
    
     E,
     DROP(
         
          REDUCE(
              
               "",
              
               D,
              
               LAMBDA(
                   x,
                    y,
                   
                    LET(
                        
                         p,
                         TEXTSPLIT(
                             y,
                              "-"
                         ),
                        
                         q,
                         IFERROR(
                             VSTACK(
                                 x,
                                  p
                             ),
                              ""
                         ),
                        
                         q
                         
                    )
                    
               )
               
          ),
         
          1
          
     ),
    
     F,
     CHOOSECOLS(
         E,
          2,
          1
     ),
    
     rID,
     CHOOSECOLS(
         rng,
          1
     ),
    
     G,
     HSTACK(
         rID,
          names,
          F
     ),
    
     H,
     SORTBY(
         G,
          CHOOSECOLS(
              G,
               1
          ),
          1,
          CHOOSECOLS(
              G,
               2
          ),
          1
     ),
    
     J,
     UNIQUE(
         H,
          FALSE,
          TRUE
     ),
    
     result,
     IFERROR(
         ABS(
             J
         ),
          J
     ),
    
     header,
     {"Dept ID",
     "Emp Name",
     "Promotion Date",
     "Salary"},
    
     Reort,
     VSTACK(
         header,
          result
     ),
    
     Reort
    
)

Excel solution 10 for Clean Salary and Promotion Data, proposed by Hamidi Hamid:

=LET(x,
    SCAN(
        ,
        A2:A13,
        LAMBDA(
            a,
            b,
            IF(
                b,
                b,
                a
            )
        )
    ),
    y,
    IFERROR(
        TOCOL(
            IF(
                B2:D13>0,
                x,
                1/0
            )
        ),
        0
    ),
    g,
    TOCOL(
        B2:D13
    ),
    h,
    IFERROR(
        TEXTBEFORE(
            g,
            "-"
        ),
        ""
    ),
    i,
    IFERROR(
        IFNA(
            TEXTBEFORE(
                TEXTAFTER(
                    g,
                    "-",
                    
                ),
                "-",
                
            ),
            TEXTAFTER(
                g,
                "-",
                
            )
        )*1,
        0
    )+SEQUENCE(
        COUNTA(
            g
        )
    ),
    ii,
    i-SEQUENCE(
        COUNTA(
            g
        )
    ),
    f,
    IFERROR(
        IF(
            TEXTAFTER(
                g,
                "-",
                -1
            )*1=ii,
            "",
            TEXTAFTER(
                g,
                "-",
                -1
            )
        )*1,
        ""
    ),
    rr,
    XLOOKUP(
        h,
        h,
        f,
        ,
        ,
        -1
    ),
    v,
    (XLOOKUP(
        h,
        h,
        i,
        ,
        ,
        1
    )=i)*i,
    rec,
    DROP(
        HSTACK(
            y,
            h,
            f,
            ii,
            v,
            rr
        ),
        -1
    ),
    t,
    FILTER(rec,
    (CHOOSECOLS(
        rec,
        4
    )>0)*(CHOOSECOLS(
        rec,
        5
    )>0),
    ),
    SORT(
        DROP(
            t,
            ,
            -2
        ),
        {124},
        1
    ))

Excel solution 11 for Clean Salary and Promotion Data, proposed by ferhat CK:

=LET(
    a,
    SCAN(
        0,
        A2:A13,
        LAMBDA(
            x,
            y,
            IF(
                y=0,
                x,
                y
            )
        )
    ),
    b,
    C2:D13,
    ta,
    TEXTBEFORE,
    tb,
    TEXTAFTER,
    c,
    DROP(
        REDUCE(
            0,
            UNIQUE(
                a
            ),
            LAMBDA(
                x,
                y,
                VSTACK(
                    x,
                    LET(
                        q,
                        SUM(
                            N(
                                a=y
                            )
                        ),
                        w,
                        CHOOSEROWS(
                            b,
                            SEQUENCE(
                                q,
                                ,
                                MATCH(
                                    y,
                                    a,
                                    0
                                )
                            )
                        ),
                        r,
                        TOCOL(
                            w,
                            3
                        ),
                        HSTACK(
                            SEQUENCE(
                                COUNTA(
                                    r
                                )
                            )^0*y,
                            r
                        )
                    )
                )
            )
        ),
        1
    ),
    Emp,
    ta(
        TAKE(
            c,
            ,
            -1
        ),
        "-"
    ),
    Pro,
    IFNA(
        tb(
            TAKE(
            c,
            ,
            -1
        ),
            "-",
            2
        ),
        ""
    ),
    Sal,
    IFNA(
        tb(
            ta(
                TAKE(
            c,
            ,
            -1
        ),
                "-",
                2
            ),
            "-"
        ),
        tb(
            TAKE(
            c,
            ,
            -1
        ),
            "-"
        )
    ),
    z,
    HSTACK(
        TAKE(
            c,
            ,
            1
        ),
        Emp,
        Pro,
        Sal
    ),
    SORT(
        z,
        {1,
        2}
    )
)

Excel solution 12 for Clean Salary and Promotion Data, proposed by Imam Hambali:

=LET(
    
    id,
     A2:A13,
    
    idn,
     SCAN(
         0,
          id,
          LAMBDA(
              a,
              b,
               IF(
                   b=0,
                   a+b,
                    a+1
               )
          )
     ),
    
    ta,
     TEXTAFTER(
         "-"&TOCOL(
             idn&"-"&C2:D13,
             1
         ),
         "-",
         {1,
         2,
         3,
         4}
     ),
    
    n,
     IFERROR(
         TEXTBEFORE(
             ta,
             "-",
             ,
             ,
             ,
             ta
         ),
         ""
     ),
    
    VSTACK(
        F1:I1,
         SORT(
             CHOOSECOLS(
                 FILTER(
                     n,
                      CHOOSECOLS(
                          n,
                          2
                      )<>""
                 ),
                 1,
                 2,
                 4,
                 3
             ),
             {1,
             2}
         )
    )
    
)

Solving the challenge of Clean Salary and Promotion Data with Python

Python solution 1 for Clean Salary and Promotion Data, proposed by Konrad Gryczan, PhD:

import pandas as pd
path = "PQ_Challenge_226.xlsx"
input = pd.read_excel(path, usecols="A:D", nrows=13)
test = pd.read_excel(path, usecols="F:I", nrows=19).rename(columns=lambda x: x.replace('.1', ''))
input['Dept ID'] = input['Dept ID'].ffill().astype('int64')
input = input.drop(columns=['Highest Paid Employee']).melt(id_vars=['Dept ID'], var_name='name', value_name='Value')
input[['Emp Names', 'Salary', 'Promotion Date']] = input['Value'].str.split('-', expand=True)
input = input.d&rop(columns=['name', 'Value']).dropna(subset=['Emp Names']).sort_values(by=['Dept ID', 'Emp Names'])
input['Promotion Date'] = pd.to_datetime(input['Promotion Date'], format="%m/%d/%Y")
input['Salary'] = pd.to_numeric(input['Salary']).astype('int64')
result = input[['Dept ID', 'Emp Names', 'Promotion Date', 'Salary']].reset_index(drop=True)
print(result.equals(test)) # True

Solving the challenge of Clean Salary and Promotion Data with Python in Excel

Python in Excel solution 1 for Clean Salary and Promotion Data, proposed by Alejandro Campos:

df = xl("A1:D13", headers=True)
df['Dept ID'].ffill(inplace=True)
def process_emp_data(did, data, p=False):
 return [(did, e.split('-')[0], pd.to_datetime(e.split('-')[2], format='%d/%m/%Y', errors='coerce')
 .strftime('%d/%m/%Y') if p and len(e.split('-')) > 2 else None, e.split('-')[1]
 if len(e.split('-')) > 1 else None) for e in data.split(', ')] if pd.notna(data) else []
pe = {}
for _, r in df.iterrows():
 for p in process_emp_data(r['Dept ID'], r['Promoted Employees'], True)
 + process_emp_data(r['Dept ID'], r['Not Promoted Employees']):
 pe.setdefault((r['Dept ID'], p[1]), p)
final_df = pd.DataFrame(pe.values(), columns=['Dept ID', 'Emp Names', 'Promotion Date', 'Salary'])
 .sort_values(by=['Dept ID', 'Emp Names']).reset_index(drop=True).fillna(' ')

Python in Excel solution 2 for Clean Salary and Promotion Data, proposed by Abdallah Ally:

df = xl("A1:D13", headers=True)
# Perform data munging
df['Dept ID'] = df['Dept ID'].ffill()
values = []
for i in df.index:
 row = df.loc[i].values
 values.append([row[0], [v for v in row[2:] if v]])
df = pd.DataFrame(data=values, columns=['Emp ID', 'Data']).explode(column='Data')
df[['Emp Names', 'Salary', 'Promotion Date']] = df.Data.str.split('-', expand=True)
df = df.iloc[:, [0, 2, 4, 3]].sort_values(by=['Emp ID', 'Emp Names'], ignore_index=True)
df['Promotion Date'] = df['Promotion Date'].map(
 lambda x: pd.to_datetime(x, format='%m/%d/%Y') if x else ''
)
df

Python in Excel solution 3 for Clean Salary and Promotion Data, proposed by Victor Wang:

df = xl("A1:D13", headers=True)
df.drop('Highest Paid Employee', axis=1, inplace=True)
df['Dept ID'] = df['Dept ID'].ffill()
unpivot = pd.melt(df, id_vars='Dept ID')
remove_blanks = unpivot[unpivot['value'].notnull()]
remove_blanks[['Emp Names', 'Salary', 'Promotion Date']] = remove_blanks['value'].str.split('-', n=0, expand=True).fillna('')
result = remove_blanks[['Dept ID', 'Emp Names', 'Promotion Date', 'Salary']].sort_values(by=['Dept ID', 'Emp Names']).reset_index(drop=True)
result

Solving the challenge of Clean Salary and Promotion Data with R

R solution 1 for Clean Salary and Promotion Data, proposed by Konrad Gryczan, PhD:

library(tidyverse)
library(readxl)
path = "Power Query/PQ_Challenge_226.xlsx"
input = read_excel(path, range = "A1:D13")
test = read_excel(path, range = "F1:I19")
result = input %>%
 fill(`Dept ID`) %>%
 select(-`Highest Paid Employee`) %>%
 pivot_longer(-`Dept ID`, values_to = "Value") %>%
 separate(Value, into = c("Emp Names", "Salary", "Promotion Date"), sep = "-") %>%
 select(-name) %>%
 filter(!is.na(`Emp Names`)) %>%
 arrange(`Dept ID`, `Emp Names`) %>%
 mutate(`Promotion Date` = as.POSIXct(`Promotion Date`, format = "%m/%d/%Y", tz = "UTC"),
 Salary = as.numeric(Salary)) %>%
 select(`Dept ID`, `Emp Names`, `Promotion Date`, Salary)
all.equal(result, test, check.attributes = FALSE)
#> [1] TRUE

Solving the challenge of Clean Salary and Promotion Data with Power Query

Solving the challenge of Clean Salary and Promotion Data with Excel

Solving the challenge of Clean Salary and Promotion Data with Python

Solving the challenge of Clean Salary and Promotion Data with Python in Excel

Solving the challenge of Clean Salary and Promotion Data with R

Leave a Reply Cancel reply