Clean the given data and align them properly in departments and sort on Dept ID & Names. Christian-65600: 65600 is salary If it is accompanied with the date, then that is promotion date. Ex. Carolyn-51900-11/12/2023
📌 Challenge Details and Links
ExcelBI Power Query Challenge Number: 226
Challenge Difficulty: ⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn
Solving the challenge of Clean Salary and Promotion Data with Power Query
Power Query solution 1 for Clean Salary and Promotion Data, proposed by Zoran Milokanović:
let
Source = Excel.CurrentWorkbook(){[Name = "Input"]}[Content],
H = {"Dept ID", "Emp Names", "Promotion Date", "Salary"},
S = Table.Sort(
Table.FromList(
Table.ToRows(
Table.UnpivotOtherColumns(
Table.FillDown(Source, {H{0}}),
{H{0}, "Highest Paid Employee"},
"A",
"V"
)
),
each
let
t = Text.Split(_{3}, "-")
in
{_{0}, t{0}, Date.From(t{2}?, "en-US"), Number.From(t{1})},
H
),
{H{0}, H{1}}
)
in
S
Power Query solution 2 for Clean Salary and Promotion Data, proposed by Kris Jaganah:
let
A = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
B = Table.FillDown(A, {"Dept ID"}),
C = (y) =>
[
a = Table.SelectRows(B, (x) => x[Dept ID] = y),
b = Table.UnpivotOtherColumns(a, {"Dept ID", "Highest Paid Employee"}, "A", "V"),
c = Table.SplitColumn(
b,
"V",
each Text.Split(_, "-"),
{"Emp Names", "Salary", "Promotion Date"}
),
d = Table.TransformColumns(
c,
{{"Promotion Date", each Date.FromText(_, [Format = "M/d/yyyy"])}, {"Salary", Number.From}}
),
e = Table.Sort(d, {"Emp Names", 0})
][e],
D = Table.Combine(
List.Transform(
List.Distinct(B[Dept ID]),
each C(_)[[Dept ID], [Emp Names], [Promotion Date], [Salary]]
)
)
in
D
Power Query solution 3 for Clean Salary and Promotion Data, proposed by Alejandro Simón 🇵🇦 🇪🇸:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
Group = Table.Group(
Source,
"Dept ID",
{
{
"A",
each
let
a = _,
b = List.Skip(Table.ToColumns(a), 2),
c = List.RemoveNulls(List.Combine(b)),
d = List.Transform(
c,
each
let
d1 = Text.Split(_, "-"),
d2 =
if List.Count(d1) = 3 then
{d1{0}} & {Date.From(List.Last(d1))} & {d1{1}}
else
{d1{0}} & {null} & {d1{1}}
in
d2
),
e = Table.Sort(
Table.FromRows(d, {"Emp Names", "Promotion Date", "Salary"}),
"Emp Names"
)
in
e
}
},
0,
(a, b) => Number.From(b <> null)
),
Sol = Table.ExpandTableColumn(Group, "A", Table.ColumnNames(Group[A]{0}))
in
Sol
Power Query solution 4 for Clean Salary and Promotion Data, proposed by Luan Rodrigues:
let
Fonte = Tabela1,
grp = Table.Group(
Fonte,
"Dept ID",
{
{
"tab",
each
let
a = List.RemoveNulls(List.Skip(List.Combine(Table.ToColumns(_)))),
b = Table.Combine(List.Transform(a, (x) => Table.FromRows({Text.Split(x, "-")}))),
c = Table.RenameColumns(
b,
List.Zip({Table.ColumnNames(b), {"Emp Names", "Salary", "Promotion Date"}})
)[[Emp Names], [Promotion Date], [Salary]]
in
c
}
},
0,
(a, b) => Number.From(b <> null)
),
exp = Table.ExpandTableColumn(grp, "tab", Table.ColumnNames(grp[tab]{0})),
srt = Table.Sort(exp, {"Dept ID", "Emp Names"})
in
srt
Power Query solution 5 for Clean Salary and Promotion Data, proposed by Hussein SATOUR:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
FillDown = Table.FillDown(Source, {"Dept ID"}),
UnpivotOther = Table.UnpivotOtherColumns(FillDown, {"Dept ID"}, "Attribute", "Value"),
RemovCols = Table.RemoveColumns(UnpivotOther, {"Attribute"}),
SplitValue = Table.SplitColumn(
RemovCols,
"Value",
Splitter.SplitTextByDelimiter("-"),
{"Emp Name", "Salary", "Promotion"}
),
SortRows = Table.Buffer(
Table.Sort(SplitValue, {{"Dept ID", 0}, {"Emp
Name", 0}, {"Promotion", 1}})
),
RemoveDuplicates = Table.Distinct(SortRows, {"Emp Name", "Salary"}),
ChangeTypes = Table.TransformColumnTypes(
RemoveDuplicates,
{{"Promotion", type date}, {"Salary", Int64.Type}}
)
in
ChangeTypes
Power Query solution 6 for Clean Salary and Promotion Data, proposed by Abdallah Ally:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
FillDown = Table.FillDown(Source, {"Dept ID"}),
Transform = List.Transform(
Table.ToRows(FillDown),
each [#"Dept ID" = _{0}, Data = List.Select(List.Skip(_, 2), each _ <> null)]
),
Expand = Table.ExpandListColumn(Table.FromRecords(Transform), "Data"),
Split = Table.SplitColumn(
Expand,
"Data",
each Text.Split(_, "-"),
{"Emp Names", "Salary", "Promotion Date"}
),
Sort = Table.Sort(Split, {{"Dept ID", 0}, {"Emp Names", 0}}),
Select = Table.SelectColumns(Sort, {"Dept ID", "Emp Names", "Promotion Date", "Salary"}),
Result = Table.TransformColumnTypes(Select, {"Promotion Date", type date})
in
Result
Power Query solution 7 for Clean Salary and Promotion Data, proposed by Eric Laforce:
let
CN = {"Emp Names", "Promotion Date", "Salary"},
Source = Excel.CurrentWorkbook(){[Name = "tData226"]}[Content],
FillDown = Table.FillDown(Source, {"Dept ID"}),
Group = Table.Group(
FillDown,
{"Dept ID"},
{
"G",
each
let
_Values = List.RemoveNulls([Promoted Employees] & [Not Promoted Employees]),
_T = Table.FromColumns({_Values}, {"ESP"}),
_Split = Table.SplitColumn(
_T,
"ESP",
Splitter.SplitTextByDelimiter("-"),
{CN{0}, CN{2}, CN{1}}
),
_Sort = Table.Sort(_Split, CN{0})
in
_Sort
}
),
Expand = Table.ExpandTableColumn(Group, "G", CN),
ChgType = Table.TransformColumnTypes(Expand, {{CN{1}, type date}, {CN{2}, Int64.Type}})
in
ChgType
Power Query solution 8 for Clean Salary and Promotion Data, proposed by 🇮🇷 Navid Esmaeilzadeh اسماعیل زاده:
let
S = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
A = Table.FillDown(S, {"Dept ID"}),
B = Table.AddColumn(A, "C", each List.RemoveNulls(List.Skip(Record.ToList(_), 2))),
C = Table.SelectColumns(B, {"Dept ID", "C"}),
D = Table.ExpandListColumn(C, "C"),
E = Table.SplitColumn(
D,
"C",
Splitter.SplitTextByDelimiter("-", QuoteStyle.Csv),
{"Emp Names", "Salary", "Promotion Date"}
),
F = Table.Sort(E, {{"Dept ID", Order.Ascending}, {"Emp Names", Order.Ascending}}),
G = Table.ReorderColumns(F, {"Dept ID", "Emp Names", "Promotion Date", "Salary"})
in
G
Power Query solution 9 for Clean Salary and Promotion Data, proposed by Alexandre Garcia:
let
a = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
b = {"Dept ID", "Emp Names", "Salary", "Promotion Date"},
c = Table.FillDown(a, {b{0}}),
d = List.TransformMany(
Table.ToRows(c),
(x) =>
List.Transform(
List.RemoveNulls(List.Skip(x, 2)),
(x) =>
let
a = Text.Split(x, "-")
in
Record.FromList(a, List.FirstN(List.Skip(b), List.Count(a)))
),
(x, y) => [Dept ID = x{0}] & y
),
Sol = Table.Sort(
Table.SelectColumns(Table.FromRecords(d, b, MissingField.UseNull), List.Sort(b)),
{{b{0}, 0}, {b{1}, 0}}
)
in
Sol
Power Query solution 10 for Clean Salary and Promotion Data, proposed by Francesco Bianchi 🇮🇹:
let
Source = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
FilledDown = Table.FillDown(Source, {"Dept ID"}),
GroupedRows = Table.Group(
FilledDown,
{"Dept ID"},
{
{
"All",
each Table.Distinct(
Table.Buffer(
Table.Sort(
Table.FromColumns(
Record.ToList(
Record.RemoveFields(
[
a = List.Transform(
List.RemoveNulls(List.Combine(List.Skip(Table.ToColumns(_)))),
each Text.Split(_, "-")
),
Emp Names = List.Transform(a, each _{0}),
Promotion Date = List.Transform(
a,
each try Date.FromText(_{2}, "en-US") otherwise null
),
Salary = List.Transform(a, each Number.From(_{1}))
],
{"a"}
)
)
),
{{"Column1", Order.Ascending}, {"Column2", Order.Descending}}
)
),
{"Column1"}
)
}
}
),
Sol = Table.ExpandTableColumn(
GroupedRows,
"All",
{"Column1", "Column2", "Column3"},
{"Emp Names", "Promotion Date", "Salary"}
)
in
Sol
Power Query solution 11 for Clean Salary and Promotion Data, proposed by Gertjan Davies:
let
Source = Problem,
Fill = Table.FillDown(Source, {"Dept ID"}),
Grouping = Table.Group(
Fill,
{"Dept ID"},
{
{
"Details",
each [[Promoted Employees], [Not Promoted Employees]],
type table [Promoted Employees = nullable text, Not Promoted Employees = nullable text]
}
}
),
Employees = Table.AddColumn(
Grouping,
"Employees",
each List.RemoveNulls(List.Combine(Table.ToColumns([Details])))
),
Extract = Table.ExpandListColumn(Employees, "Employees"),
Split = Table.SplitColumn(
Extract,
"Employees",
Splitter.SplitTextByDelimiter("-", QuoteStyle.Csv),
{"Emp Name", "Salary", "Promotion Date"}
),
ChangedType = Table.TransformColumnTypes(
Split,
{{"Emp Name", type text}, {"Salary", Int64.Type}, {"Promotion Date", type date}},
"en-US"
),
Sort = Table.Sort(ChangedType, {{"Dept ID", Order.Ascending}, {"Emp Name", Order.Ascending}})[
[Dept ID],
[Emp Name],
[Promotion Date],
[Salary]
]
in
Sort
Power Query solution 12 for Clean Salary and Promotion Data, proposed by Sanket Doijode:
let
Source = Table.TransformColumns(
Table.FillDown(
Table.RemoveColumns(
Excel.CurrentWorkbook(){[Name = "Table1"]}[Content],
"Highest Paid Employee"
),
{"Dept ID"}
),
{{"Not Promoted Employees", each _ & "-null"}}
),
Result = Table.ReorderColumns(
Table.ExpandTableColumn(
Table.Group(
Source,
{"Dept ID"},
{
{
"Count",
each Table.FromList(
List.Sort(
List.RemoveNulls(List.Combine(Table.ToColumns(Table.RemoveColumns(_, "DeptID")))),
Order.Ascending
),
Splitter.SplitTextByDelimiter("-"),
{"Emp Names", "Salary", "Promotion Date"}
)
}
}
),
"Count",
{"Emp Names", "Salary", "Promotion Date"}
),
{"Dept ID", "Emp Names", "Promotion Date", "Salary"}
)
in
Result
Power Query solution 13 for Clean Salary and Promotion Data, proposed by Sahan Jayasuriya:
let
Source = Excel.CurrentWorkbook(){[Name = "Data"]}[Content],
FilledDown = Table.FillDown(Source, {"Dept ID"}),
GroupedRows = Table.Group(FilledDown, {"Dept ID"}, {{"All", each _}}),
TableTransform = Table.TransformColumns(
GroupedRows,
{
{
"All",
each [
a = List.RemoveFirstN(Table.ToColumns(_), 1),
b = List.Combine(List.Transform(a, (x) => List.RemoveNulls(x))),
c = List.Select(
b,
(x) =>
Text.Length(x)
= List.Max(
List.Transform(
List.Select(b, (k) => Text.Contains(k, Text.BeforeDelimiter(x, "-"))),
(p) => Text.Length(p)
)
)
),
d = List.Distinct(c),
e = List.Transform(d, (x) => Text.Split(x, "-")),
f = List.Transform(e, (x) => if List.Count(x) < 3 then x & {null} else x),
g = Table.FromRows(f, {"Emp Name", "Salary", "Promotion Date"})
][g]
}
}
),
ExpandedTable = Table.ExpandTableColumn(
TableTransform,
"All",
{"Emp Name", "Promotion Date", "Salary"}
),
ChangedType = Table.TransformColumnTypes(
ExpandedTable,
{
{"Dept ID", Int64.Type},
{"Emp Name", type text},
{"Promotion Date", type date},
{"Salary", Int64.Type}
}
),
SortedRows = Table.Sort(
ChangedType,
{{"Dept ID", Order.Ascending}, {"Emp Name", Order.Ascending}}
)
in
SortedRows
Solving the challenge of Clean Salary and Promotion Data with Excel
Excel solution 1 for Clean Salary and Promotion Data, proposed by Bo Rydobon 🇹🇭:
=LET(
c,
C2:D13,
d,
HSTACK(
SCAN(
,
TOCOL(
IFS(
c>0,
A2:A13
),
3
),
MAX
),
TEXTSPLIT(
CONCAT(
c&"|"
),
"-",
"|",
1,
,
""
)
),
SORTBY(
SORT(
IFERROR(
--d,
d
),
{1,
2}
),
{1,
2,
4,
3}
)
)
Excel solution 2 for Clean Salary and Promotion Data, proposed by Bo Rydobon 🇹🇭:
=LET(
c,
B2:D13,
d,
TEXTSPLIT(
CONCAT(
TOCOL(
c,
3
)&"|"
),
"-",
"|",
1,
,
0
),
g,
GROUPBY(
HSTACK(
TOCOL(
IFS(
c>0,
SCAN(
,
A2:A13,
LAMBDA(
a,
v,
IF(
v,
v,
a
)
)
)
),
3
),
TAKE(
d,
,
1
)
),
--CHOOSECOLS(
d,
3,
2
),
MAX,
,
0
),
IF(
g>0,
g,
""
)
)
Excel solution 3 for Clean Salary and Promotion Data, proposed by 🇰🇷 Taeyong Shin:
=LET(
d,
C2:D13,
r,
REGEXREPLACE(
SCAN(
,
TOCOL(
IFS(
d>0,
A2:A13
),
2
),
MAX
)&TOCOL(
d,
1
& ),
"(d+)(pL+)-((?1))-?([/d]+)?",
"$"&{1,
2,
4,
3}
),
SORT(
IFERROR(
--r,
r
),
{1,
2}
)
)
Excel solution 4 for Clean Salary and Promotion Data, proposed by Kris Jaganah:
=LET(
a,
SCAN(
0,
A2:A13,
LAMBDA(
x,
y,
IF(
y,
y,
x
)
)
),
b,
TEXTSPLIT(
CONCAT(
D2:D13&","
),
"-",
",",
1
),
c,
TEXTSPLIT(
CONCAT(
C2:C13&","
),
"-",
","
),
d,
VSTACK(
a&"-"&TAKE(
b,
,
1
),
DROP(
a&"-"&TAKE(
c,
,
1
),
-1
)
),
e,
FILTER(
d,
TEXTAFTER(
d,
"-"
)<>""
),
f,
--TEXTBEFORE(
e,
"-"
),
g,
TEXTAFTER(
e,
"-"
),
h,
IFNA(
TEXT(
VLOOKUP(
g,
c,
3,
0
),
"m/dd/yyy"
),
""
),
i,
--IFNA(
VLOOKUP(
g,
b,
2,
0
),
VLOOKUP(
g,
c,
2,
0
)
),
VSTACK(
{"Dept ID",
"Emp Names",
"Promotion Date",
"Salary"},
SORT(
HSTACK(
f,
g,
h,
i
),
{1,
2}
)
)
)
Excel solution 5 for Clean Salary and Promotion Data, proposed by Julian Poeltl:
=LET(
T,
TOCOL(
SCAN(
,
A2:A13,
LAMBDA(
A,
B,
IF(
B="",
A,
B
)
)
)&"-"&C2:D13
),
S,
TEXTSPLIT(
TEXTJOIN(
"|",
,
FILTER(
T,
LEN(
T
)>2
)
),
"-",
"|"
),
C,
IFNA(
IFERROR(
--S,
S
),
""
),
SORT(
SORT(
HSTACK(
TAKE(
C,
,
2
),
TAKE(
C,
,
-1
),
CHOOSECOLS(
C,
3
)
),
2
),
1
)
)
Excel solution 6 for Clean Salary and Promotion Data, proposed by Oscar Mendez Roca Farell:
=LET(d,
C2:D13,
h,
HSTACK(TOCOL(SCAN(
,
A2:A13,
MAX
)/(d>0),
3),
TEXTSPLIT(
CONCAT(
d&"|"
),
"-",
"|",
1,
,
""
)),
SORTBY(
SORT(
IFERROR(
--h,
h
),
{1,
2}
),
{1,
2,
4,
3}
))
Excel solution 7 for Clean Salary and Promotion Data, proposed by Sunny Baggu:
=LET(
_a,
SCAN(
"",
A2:A13,
LAMBDA(
a,
v,
IF(
v = "",
a,
v
)
)
),
_b,
IF(
B2:D13 = "",
x,
HSTACK(
IF(
ISNUMBER(
SEARCH(
TEXTBEFORE(
B2:B13,
"-"
),
C2:C13
)
),
x,
SUBSTITUTE(
B2:B13,
"-",
"--"
)
),
C2:C13,
SUBSTITUTE(
D2:D13,
"-",
"--"
)
)
),
REDUCE(
{"Dept ID",
"Emp Names",
"Promotion Date",
"Salary"},
UNIQUE(
_a
),
LAMBDA(
x,
y,
VSTACK(
x,
SORTBY(
UNIQUE(
SORT(
IFNA(
DROP(
REDUCE(
"",
TOCOL(
FILTER(
_b,
_a = y
),
3
),
LAMBDA(
a,
v,
VSTACK(
a,
HSTACK(
y,
TEXTSPLIT(
v,
"-",
,
1
)
)
)
)
),
1
),
""
),
{2,
3}
)
),
{1,
2,
4,
3},
1
)
)
)
)
)
Excel solution 8 for Clean Salary and Promotion Data, proposed by LEONARD OCHEA 🇷🇴:
=LET(
t,
A2:D13,
C,
CHOOSECOLS,
V,
TOCOL,
d,
C(
t,
2,
3,
4
),
x,
V(
IF(
d>"",
SCAN(
,
C(
t,
1
),
MAX
),
z
),
3
),
y,
V(
d,
1
),
z,
TEXTSPLIT(
TEXTJOIN(
"|",
,
y
),
"-",
"|"
),
IFNA(
GROUPBY(
HSTACK(
x,
C(
z,
1
)
),
--C(
z,
3,
2
),
SINGLE,
,
0
),
""
)
)
Excel solution 9 for Clean Salary and Promotion Data, proposed by Md. Zohurul Islam:
=LET(
A,
A2:A13,
B,
C1:D1,
C,
C2:D13,
ids,
SCAN(
0,
A,
LAMBDA(
x,
y,
MAX(
x,
y
)
)
),
deptID,
TOCOL(
IFNA(
ids,
B
)
),
data,
TOCOL(
C,
0
),
rng,
UNIQUE(
FILTER(
HSTACK(
deptID,
data
),
data <> 0
),
,
FALSE
),
names,
TEXTBEFORE(
CHOOSECOLS(
rng,
2
),
"-"
),
D,
TEXTAFTER(
CHOOSECOLS(
rng,
2
),
names & "-"
),
E,
DROP(
REDUCE(
"",
D,
LAMBDA(
x,
y,
LET(
p,
TEXTSPLIT(
y,
"-"
),
q,
IFERROR(
VSTACK(
x,
p
),
""
),
q
)
)
),
1
),
F,
CHOOSECOLS(
E,
2,
1
),
rID,
CHOOSECOLS(
rng,
1
),
G,
HSTACK(
rID,
names,
F
),
H,
SORTBY(
G,
CHOOSECOLS(
G,
1
),
1,
CHOOSECOLS(
G,
2
),
1
),
J,
UNIQUE(
H,
FALSE,
TRUE
),
result,
IFERROR(
ABS(
J
),
J
),
header,
{"Dept ID",
"Emp Name",
"Promotion Date",
"Salary"},
Reort,
VSTACK(
header,
result
),
Reort
)
Excel solution 10 for Clean Salary and Promotion Data, proposed by Hamidi Hamid:
=LET(x,
SCAN(
,
A2:A13,
LAMBDA(
a,
b,
IF(
b,
b,
a
)
)
),
y,
IFERROR(
TOCOL(
IF(
B2:D13>0,
x,
1/0
)
),
0
),
g,
TOCOL(
B2:D13
),
h,
IFERROR(
TEXTBEFORE(
g,
"-"
),
""
),
i,
IFERROR(
IFNA(
TEXTBEFORE(
TEXTAFTER(
g,
"-",
),
"-",
),
TEXTAFTER(
g,
"-",
)
)*1,
0
)+SEQUENCE(
COUNTA(
g
)
),
ii,
i-SEQUENCE(
COUNTA(
g
)
),
f,
IFERROR(
IF(
TEXTAFTER(
g,
"-",
-1
)*1=ii,
"",
TEXTAFTER(
g,
"-",
-1
)
)*1,
""
),
rr,
XLOOKUP(
h,
h,
f,
,
,
-1
),
v,
(XLOOKUP(
h,
h,
i,
,
,
1
)=i)*i,
rec,
DROP(
HSTACK(
y,
h,
f,
ii,
v,
rr
),
-1
),
t,
FILTER(rec,
(CHOOSECOLS(
rec,
4
)>0)*(CHOOSECOLS(
rec,
5
)>0),
),
SORT(
DROP(
t,
,
-2
),
{124},
1
))
Excel solution 11 for Clean Salary and Promotion Data, proposed by ferhat CK:
=LET(
a,
SCAN(
0,
A2:A13,
LAMBDA(
x,
y,
IF(
y=0,
x,
y
)
)
),
b,
C2:D13,
ta,
TEXTBEFORE,
tb,
TEXTAFTER,
c,
DROP(
REDUCE(
0,
UNIQUE(
a
),
LAMBDA(
x,
y,
VSTACK(
x,
LET(
q,
SUM(
N(
a=y
)
),
w,
CHOOSEROWS(
b,
SEQUENCE(
q,
,
MATCH(
y,
a,
0
)
)
),
r,
TOCOL(
w,
3
),
HSTACK(
SEQUENCE(
COUNTA(
r
)
)^0*y,
r
)
)
)
)
),
1
),
Emp,
ta(
TAKE(
c,
,
-1
),
"-"
),
Pro,
IFNA(
tb(
TAKE(
c,
,
-1
),
"-",
2
),
""
),
Sal,
IFNA(
tb(
ta(
TAKE(
c,
,
-1
),
"-",
2
),
"-"
),
tb(
TAKE(
c,
,
-1
),
"-"
)
),
z,
HSTACK(
TAKE(
c,
,
1
),
Emp,
Pro,
Sal
),
SORT(
z,
{1,
2}
)
)
Excel solution 12 for Clean Salary and Promotion Data, proposed by Imam Hambali:
=LET(
id,
A2:A13,
idn,
SCAN(
0,
id,
LAMBDA(
a,
b,
IF(
b=0,
a+b,
a+1
)
)
),
ta,
TEXTAFTER(
"-"&TOCOL(
idn&"-"&C2:D13,
1
),
"-",
{1,
2,
3,
4}
),
n,
IFERROR(
TEXTBEFORE(
ta,
"-",
,
,
,
ta
),
""
),
VSTACK(
F1:I1,
SORT(
CHOOSECOLS(
FILTER(
n,
CHOOSECOLS(
n,
2
)<>""
),
1,
2,
4,
3
),
{1,
2}
)
)
)
Solving the challenge of Clean Salary and Promotion Data with Python
Python solution 1 for Clean Salary and Promotion Data, proposed by Konrad Gryczan, PhD:
import pandas as pd
path = "PQ_Challenge_226.xlsx"
input = pd.read_excel(path, usecols="A:D", nrows=13)
test = pd.read_excel(path, usecols="F:I", nrows=19).rename(columns=lambda x: x.replace('.1', ''))
input['Dept ID'] = input['Dept ID'].ffill().astype('int64')
input = input.drop(columns=['Highest Paid Employee']).melt(id_vars=['Dept ID'], var_name='name', value_name='Value')
input[['Emp Names', 'Salary', 'Promotion Date']] = input['Value'].str.split('-', expand=True)
input = input.d&rop(columns=['name', 'Value']).dropna(subset=['Emp Names']).sort_values(by=['Dept ID', 'Emp Names'])
input['Promotion Date'] = pd.to_datetime(input['Promotion Date'], format="%m/%d/%Y")
input['Salary'] = pd.to_numeric(input['Salary']).astype('int64')
result = input[['Dept ID', 'Emp Names', 'Promotion Date', 'Salary']].reset_index(drop=True)
print(result.equals(test)) # True
Solving the challenge of Clean Salary and Promotion Data with Python in Excel
Python in Excel solution 1 for Clean Salary and Promotion Data, proposed by Alejandro Campos:
df = xl("A1:D13", headers=True)
df['Dept ID'].ffill(inplace=True)
def process_emp_data(did, data, p=False):
return [(did, e.split('-')[0], pd.to_datetime(e.split('-')[2], format='%d/%m/%Y', errors='coerce')
.strftime('%d/%m/%Y') if p and len(e.split('-')) > 2 else None, e.split('-')[1]
if len(e.split('-')) > 1 else None) for e in data.split(', ')] if pd.notna(data) else []
pe = {}
for _, r in df.iterrows():
for p in process_emp_data(r['Dept ID'], r['Promoted Employees'], True)
+ process_emp_data(r['Dept ID'], r['Not Promoted Employees']):
pe.setdefault((r['Dept ID'], p[1]), p)
final_df = pd.DataFrame(pe.values(), columns=['Dept ID', 'Emp Names', 'Promotion Date', 'Salary'])
.sort_values(by=['Dept ID', 'Emp Names']).reset_index(drop=True).fillna(' ')
Python in Excel solution 2 for Clean Salary and Promotion Data, proposed by Abdallah Ally:
df = xl("A1:D13", headers=True)
# Perform data munging
df['Dept ID'] = df['Dept ID'].ffill()
values = []
for i in df.index:
row = df.loc[i].values
values.append([row[0], [v for v in row[2:] if v]])
df = pd.DataFrame(data=values, columns=['Emp ID', 'Data']).explode(column='Data')
df[['Emp Names', 'Salary', 'Promotion Date']] = df.Data.str.split('-', expand=True)
df = df.iloc[:, [0, 2, 4, 3]].sort_values(by=['Emp ID', 'Emp Names'], ignore_index=True)
df['Promotion Date'] = df['Promotion Date'].map(
lambda x: pd.to_datetime(x, format='%m/%d/%Y') if x else ''
)
df
Python in Excel solution 3 for Clean Salary and Promotion Data, proposed by Victor Wang:
df = xl("A1:D13", headers=True)
df.drop('Highest Paid Employee', axis=1, inplace=True)
df['Dept ID'] = df['Dept ID'].ffill()
unpivot = pd.melt(df, id_vars='Dept ID')
remove_blanks = unpivot[unpivot['value'].notnull()]
remove_blanks[['Emp Names', 'Salary', 'Promotion Date']] = remove_blanks['value'].str.split('-', n=0, expand=True).fillna('')
result = remove_blanks[['Dept ID', 'Emp Names', 'Promotion Date', 'Salary']].sort_values(by=['Dept ID', 'Emp Names']).reset_index(drop=True)
result
Solving the challenge of Clean Salary and Promotion Data with R
R solution 1 for Clean Salary and Promotion Data, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
path = "Power Query/PQ_Challenge_226.xlsx"
input = read_excel(path, range = "A1:D13")
test = read_excel(path, range = "F1:I19")
result = input %>%
fill(`Dept ID`) %>%
select(-`Highest Paid Employee`) %>%
pivot_longer(-`Dept ID`, values_to = "Value") %>%
separate(Value, into = c("Emp Names", "Salary", "Promotion Date"), sep = "-") %>%
select(-name) %>%
filter(!is.na(`Emp Names`)) %>%
arrange(`Dept ID`, `Emp Names`) %>%
mutate(`Promotion Date` = as.POSIXct(`Promotion Date`, format = "%m/%d/%Y", tz = "UTC"),
Salary = as.numeric(Salary)) %>%
select(`Dept ID`, `Emp Names`, `Promotion Date`, Salary)
all.equal(result, test, check.attributes = FALSE)
#> [1] TRUE
&
