Home » Generate Soundex Codes

Generate Soundex Codes

Every Soundex code consists of a letter and three numbers, such as W-252. The letter is always the first letter of the surname. The numbers are assigned to the remaining letters of the surname according to the Soundex guide shown below. Zeroes are added at the end if necessary to produce a four-character code. Additional letters are disregarded. Number to Letters mapping is on right table. Disregard the letters A, E, I, O, U, H, W, and Y Examples: Washington is coded W-252 (W, 2 for the S, 5 for the N, 2 for the G, remaining letters disregarded). Lee is coded L-000 If the surname has any double letters, they should be treated as one letter. For example: Gutierrez is coded G-362 (G, 3 for the T, 6 for the first R, second R ignored, 2 for the Z).

📌 Challenge Details and Links
ExcelBI Power Query Challenge Number: 128
Challenge Difficulty: ⭐️⭐️⭐️
📥Download Sample File
📥Link to the solutions on LinkedIn

Solving the challenge of Generate Soundex Codes with Power Query

Power Query solution 1 for Generate Soundex Codes, proposed by Zoran Milokanović:
let
  Source = G("Table1"), 
  G = each Excel.CurrentWorkbook(){[Name = _]}[Content], 
  S = Table.AddColumn(
    Source, 
    "Soundex Code", 
    each 
      let
        t = G("Table2"), 
        c = Table.ColumnNames(t), 
        l = Text.ToList(Text.Upper(Text.Split([Name], " "){1})), 
        r = List.ReplaceMatchingItems(
          l, 
          Table.ToRows(
            Table.SelectColumns(
              Table.ExpandListColumn(
                Table.TransformColumns(t, {{c{1}, each Text.Split(_, ", ")}}), 
                c{1}
              ), 
              {c{1}, c{0}}
            )
          )
        )
      in
        Text.Start(
          List.Accumulate(
            {1 .. List.Count(r) - 1}, 
            l{0} & "-", 
            (s, c) => s & (if r{c} <> r{c - 1} and r{c} is number then Text.From(r{c}) else "")
          )
            & "000", 
          5
        )
  )
in
  S
Power Query solution 2 for Generate Soundex Codes, proposed by Alejandro Simón 🇵🇦 🇪🇸:
letras dobles, como en el caso de "Williams" (l), y  respetando otras repeticiones como en "Johnson" (n).

let
 T1 = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
 T2 = List.Transform(Table.ToRows(Table.ExpandListColumn(Table.TransformColumns(Excel.CurrentWorkbook(){[Name="Table2"]}[Content], {"Represents the Letters", each Text.Split(_, ", ")}), "Represents the Letters")), List.Reverse),
 Sol = Table.AddColumn(T1, "Soundex Code", each 
 let
 a = Text.Split([Name], " "){1},
 b = Table.Group(Table.FromColumns({Text.ToList(Text.Upper(a))}), {"Column1"}, {"T", each _}, 0)[Column1],
 c = List.RemoveMatchingItems(List.Skip(b), {"A","E","I","O","U","H","W","Y"}),
 d = Text.Start(a,1)&"-"&Text.Start(Text.PadEnd(Text.Combine(List.Transform(List.ReplaceMatchingItems(c, T2), Text.From)), 3, "0"),3)
 in d)
in
 Sol


                    
                  
          
Power Query solution 3 for Generate Soundex Codes, proposed by Luan Rodrigues:
let
  Fonte = Tabela1, 
  des = {"A", "E", "I", "O", "U", "H", "W", "Y"}, 
  list = List.Zip(
    List.Reverse(
      Table.ToColumns(
        Table.ExpandListColumn(
          Table.TransformColumns(Tabela2, {{"Represents the Letters", each Text.Split(_, ", ")}}), 
          "Represents the Letters"
        )
      )
    )
  ), 
  res = Table.AddColumn(
    Fonte, 
    "Personalizar", 
    each [
      a = Text.Split(Text.Lower([Name]), " "){1}, 
      b = List.FirstN(
        List.RemoveMatchingItems(
          List.RemoveFirstN(Text.ToList(a), 1), 
          des, 
          Comparer.OrdinalIgnoreCase
        ), 
        4
      ), 
      c = Table.Group(Table.FromColumns({b}), "Column1", {{"Count", each _}}, GroupKind.Local)[
        Column1
      ], 
      d = List.ReplaceMatchingItems(c, list, Comparer.OrdinalIgnoreCase), 
      e = Text.Upper(Text.Start(a, 1))
        & "-"
        & Text.Combine(
          List.Transform(
            if List.Count(d) = 4 then
              List.FirstN(d, 3)
            else if List.Count(d) = 2 then
              d & {0}
            else
              d, 
            Text.From
          )
        )
    ][e]
  )
in
  res
Power Query solution 4 for Generate Soundex Codes, proposed by Alexis Olson:
let
  NumbersTable = Excel.CurrentWorkbook(){[Name = "Table3"]}[Content], 
  SplitColumn = Table.ExpandListColumn(
    Table.TransformColumns(
      NumbersTable, 
      {{"Represents the Letters", Splitter.SplitTextByDelimiter(", ")}}
    ), 
    "Represents the Letters"
  ), 
  T2 = Table.TransformColumnTypes(
    SplitColumn, 
    {{"Number", type text}, {"Represents the Letters", type text}}
  ), 
  NamesTable = Excel.CurrentWorkbook(){[Name = "Table1"]}[Content], 
  AddColumn = Table.AddColumn(
    NamesTable, 
    "Custom", 
    each [
      Surname = Text.AfterDelimiter([Name], " "), 
      First = Text.Start(Surname, 1), 
      Remaining = Text.ToList(Text.Upper(Text.Remove(Surname, First))), 
      Dedupe = List.Accumulate(Remaining, {}, (s, c) => if List.Last(s) = c then s else s & {c}), 
      Letters = List.Select(Dedupe, (X) => List.Contains(T2[Represents the Letters], X)), 
      Replacements = List.Zip({T2[Represents the Letters], T2[Number]}), 
      Digits = Text.Combine(List.ReplaceMatchingItems(Letters, Replacements)), 
      Result = First & "-" & Text.PadEnd(Text.Start(Digits, 3), 3, "0")
    ][Result], 
    type text
  )
in
  AddColumn
Power Query solution 5 for Generate Soundex Codes, proposed by Luke Jarych:
let
 Source = Table1,
 DisregardLetters = {"A","E","I","O","U","H","W","Y"},
 SplitSurname = Table.AddColumn(Source, "ListSplit", each Text.ToList(Text.Split(Text.Upper([Name]), " "){1})),
 RemoveNotMatchingLetters = Table.AddColumn (SplitSurname, "RemoveLetters", 
 each let 
 surname = Text.Split([Name], " "){1},
 a = [ListSplit],
 b = List.RemoveMatchingItems(a, DisregardLetters),
 c = List.ReplaceMatchingItems(b, T2),
 d = List.Distinct(List.FirstN(List.RemoveFirstN(c, 1),3)),
 e = Text.PadEnd(Text.Start(surname,1) & "-" & Text.Combine(List.Transform(d, each Text.From(_)), ""),5,"0")
 in e)
in
 RemoveNotMatchingLetters

and T2:
let
 Source = Excel.CurrentWorkbook(){[Name="Table2"]}[Content],
 AddedCustom = Table.AddColumn(Source, "Split", each Text.Split([Represents the Letters], ", ")),
 ExpandedSplit = List.Reverse(Table.ToColumns(Table.ExpandListColumn(AddedCustom, "Split")[[Number], [Split]])),
 ListZip = List.Zip(ExpandedSplit)
in
 ListZip


                    
                  
          
Power Query solution 6 for Generate Soundex Codes, proposed by Obi E, MPH:
let
 Source = Excel.CurrentWorkbook(){[Name="Table4"]}[Content],
 Custom1 = Source,
 #"Filtered Rows" = Table.SelectRows(Custom1, each [Number] = 4),
 #"Replaced Value" = Table.ReplaceValue(#"Filtered Rows","L","",Replacer.ReplaceText,{"Represents the Letters"}),
 #"Appended Query" = Table.Combine({Source, #"Replaced Value"}),
 #"Split Column by Delimiter" = Table.ExpandListColumn(Table.TransformColumns(#"Appended Query", {{"Represents the Letters", Splitter.SplitTextByDelimiter(",", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Represents the Letters"),
 #"Trimmed Text" = Table.TransformColumns(#"Split Column by Delimiter",{{"Represents the Letters", Text.Trim, type text}}),
 #"Cleaned Text" = Table.TransformColumns(#"Trimmed Text",{{"Represents the Letters", Text.Clean, type text}})
in
 #"Cleaned Text"


                    
                  
          
            

  
                  
      
    
      
      
      
        
          let Source = Excel.CurrentWorkbook(){[Name="Table3"]}[Content], #"Dupl - Pastebin.com
          Pastebin.com is the number one paste tool since 2002. Pastebin is a website where you can store text online for a set period of time.

Solving the challenge of Generate Soundex Codes with Excel

Excel solution 1 for Generate Soundex Codes, proposed by Bo Rydobon 🇹🇭:
=MAP(TEXTAFTER(A2:A11," "),LAMBDA(a,LET(m,MID(a,SEQUENCE(9)+1,1),LEFT(a)&"-"&LEFT(CONCAT(TOCOL(IFS(m<>VSTACK(0,m),MATCH("*"&m&"*",VSTACK("",D2:D7),)-1),3),0,0),3))))
Excel solution 2 for Generate Soundex Codes, proposed by Rick Rothstein:
=LET(u,UPPER(TEXTAFTER(A2:A11," ",-1)),t,MID(u,2,99),n,REDUCE(REDUCE(t,MID("AEIOUHWY",SEQUENCE(,8),1),LAMBDA(a,x,SUBSTITUTE(a,x,""))),CHAR(SEQUENCE(26,,65)),LAMBDA(a,x,SUBSTITUTE(a,x&x,x))),LEFT(u)&"-"&MAP(n,LAMBDA(x,LEFT(REDUCE("",MID(x,SEQUENCE(,LEN(x)),1),LAMBDA(a,x,a&MATCH("*"&x&"*",D2:D7,0)))&"000",3))))
Excel solution 3 for Generate Soundex Codes, proposed by محمد حلمي:
=MAP(A2:A11,LAMBDA(c,LET(
e,TEXTAFTER(c," "),LEFT(e)&-LEFT(CONCAT(SCAN(,TOCOL(MAP(MID(e,SEQUENCE(
LEN(e)-1,,2),1),LAMBDA(a,TOCOL(SEARCH(a,D2:D7)^0*
C2:C7,2))),2),LAMBDA(a,d,IF(a=d,"",d))),0),3))))
Excel solution 4 for Generate Soundex Codes, proposed by Kris Jaganah:
=HSTACK(A2:A11,MAP(A2:A11,LAMBDA(z,LET(a,TEXTSPLIT(ARRAYTOTEXT(C2:D7),,", "),b,SCAN(1,IFERROR(--a,),LAMBDA(x,y,IF(y=0,x,y))),c,TEXTAFTER(z," "),d,MID(c,SEQUENCE(LEN(c)),1),e,VSTACK(TAKE(d,1),DROP(d,-1)),f,FILTER(d,d<>e),LEFT(TAKE(d,1)&"-"&CONCAT(XLOOKUP(f,a,b,""))&"00",5)))))
Excel solution 5 for Generate Soundex Codes, proposed by Sunny Baggu:
=MAP(
 A2:A11,
 LAMBDA(x,
 LET(
 _ta, TEXTAFTER(x, " "),
 _t1, LEFT(_ta),
 _t2, RIGHT(_ta, LEN(_ta) - 1),
 _m, MID(_t2, SEQUENCE(LEN(_t2)), 1),
 _list, FILTER(_m, VSTACK(1, NOT(DROP(_m, -1) = DROP(_m, 1)))),
 _t1 & "-" &
 LEFT(TEXTJOIN("", 1, VSTACK(MAP(_list, LAMBDA(x, FILTER(C2:C7, ISNUMBER(SEARCH(x, D2:D7)), ""))), 0)), 3)
 )
 )
)
Excel solution 6 for Generate Soundex Codes, proposed by LEONARD OCHEA 🇷🇴:
=MAP(A2:A11,LAMBDA(a,LET(b,TEXTAFTER(a," "),s,MID(b,SEQUENCE(LEN(b)-1,,2),1),c,REDUCE(CONCAT(XLOOKUP("*"&s&"*",D2:D7,C2:C7,"",2)),SEQUENCE(9),LAMBDA(a,b,SUBSTITUTE(a,b*11,b))),LEFT(b)&"-"&LEFT(100*c,3))))

with header
=LET(t,A1:A11,HSTACK(t,VSTACK("Soundex Code",MAP(DROP(t,1),LAMBDA(a,LET(b,TEXTAFTER(a," "),s,MID(b,SEQUENCE(LEN(b)-1,,2),1),c,REDUCE(CONCAT(XLOOKUP("*"&s&"*",D2:D7,C2:C7,"",2)),SEQUENCE(9),LAMBDA(a,b,SUBSTITUTE(a,b*11,b))),LEFT(b)&"-"&LEFT(100*c,3)))))))
Excel solution 7 for Generate Soundex Codes, proposed by Stefan Olsson:
=MAP(A2:A11, 
LAMBDA(_n, 
LET(
_i, REGEXEXTRACT(_n, "([A-Z])[a-z]*$"),
_s, REGEXEXTRACT(_n, "([a-z]*$)"),
_x, SCAN(,REGEXEXTRACT(UPPER(_s), REPT("(.)", LEN(_s))), 
LAMBDA(p, _l, 
LET(c, IFNA(FILTER(C2:C7, REGEXMATCH(D2:D7, _l)),""), IF(p=c,,c))
)),
LEFT(TEXTJOIN("", 1, _i, "-", _x, "000"), 5)
)))
Excel solution 8 for Generate Soundex Codes, proposed by Edwin Tisnado:
=LEFT(TEXTAFTER(A2:A11," "),1)&BYROW(MAP(MID(MAP(A2:A11,LAMBDA(a,CONCAT(TEXTSPLIT(TEXTAFTER(a," "),{"a","e","i","o","u","h","w","y"},,1)))),{2,3,4},1),LAMBDA(x,IF(x="",0,SUM(NOT(ISERR(SEARCH(x,D2:D7)))*ROW(1:6))))),LAMBDA(f,"-"&CONCAT(f)))
Excel solution 9 for Generate Soundex Codes, proposed by Edwin Tisnado:
=MAP(A2:A11,LAMBDA(a,LET(b,TEXTAFTER(a," "),MAP(CONCAT(TEXTSPLIT(b,{"a","e","i","o","u","h","w","y"})),LAMBDA(c,LEFT(b,1)&"-"&CONCAT(MAP(MID(REDUCE("",MID(c,ROW(2:5),1),LAMBDA(x,y,IF(RIGHT(x,1)=y,x,x&y))),{1,2,3},1),LAMBDA(z,IF(z="",0,SUM(NOT(ISERR(SEARCH(z,D2:D7)))*ROW(1:6)))))))))))
Excel solution 10 for Generate Soundex Codes, proposed by Hazem Hassan:
=LET(a,TEXTAFTER(A2:A11," "),
LEFT(a)&"-"&LEFT(MAP(MAP(a,LAMBDA(x,CONCAT(TOCOL(VLOOKUP(MID(x,SEQUENCE(LEN(x),,2),1),TRIM(CHOOSECOLS(TEXTSPLIT(CONCAT(SUBSTITUTE(","&D2:D7,",","@"&C2:C7&"*")),"*","@",1),2,1)),2,0),3)))),LAMBDA(x,CONCAT(SCAN(,MID(x,SEQUENCE(LEN(x)),1),LAMBDA(x,y,IF(x=y,"",y))))))&REPT(0,3),3))
Excel solution 11 for Generate Soundex Codes, proposed by Hazem Hassan:
=MAP(TEXTAFTER(A2:A11," "),LAMBDA(Y,LET(a,C2:C7,b,D2:D7,c,CONCAT(TOCOL(IF(FIND(UNIQUE(PROPER(DROP(MID(Y,SEQUENCE(,LEN(Y),2),1),,-1)),1),b)>=1,a,""),3,1)),LEFT(Y)&"-"&LEFT(c&REPT("0",100-LEN(c)),3))))

Solving the challenge of Generate Soundex Codes with R

R solution 1 for Generate Soundex Codes, proposed by Konrad Gryczan, PhD:
library(tidyverse)
library(readxl)
names = read_excel("PQ_Challenge_128.xlsx", range = "A1:A11")
mapping_table = read_excel("PQ_Challenge_128.xlsx", range = "C1:D7")
test = read_excel("PQ_Challenge_128.xlsx", range = "F1:G11")
remove_consecutive_doubles <- function(vec) {
 paste(vec[c(TRUE, vec[-1] != vec[-length(vec)])], collapse = "")
}
mapping <- mapping_table %>%
 separate_rows(2, sep = ", ") %>%
 rename(letter = 2) %>%
 mutate(letter = str_to_lower(letter)) 
 
part 1/2 (to much characters ;D) 
                    
                  
R solution 2 for Generate Soundex Codes, proposed by Konrad Gryczan, PhD:
result <- names %>%
 separate(1, into = c("first_name", "surname"), sep = " ") %>%
 select(surname) %>%
 mutate(
 first_letter = str_sub(surname, 1, 1),
 soundex_code = str_sub(surname, 2) %>%
 str_split("") %>%
 map_chr(~ remove_consecutive_doubles(.x)) %>%
 str_split("") %>%
 map(~ .x[!.x %in% c("a", "e", "i", "o", "u", "h", "w")]) %>%
 map(~ ifelse(.x %in% mapping$letter, as.character(mapping$Number[match(.x, mapping$letter)]), .x)) %>%
 map_chr(~ {
 if (length(.x) < 3) return(str_pad(paste0(.x, collapse = ""), width = 3, side = "right", pad = "0"))
 if (length(.x) > 3) return(str_sub(paste0(.x, collapse = ""), 1, 3))
 return(paste0(.x, collapse = ""))
 })) %>%
 mutate(final_soundex = str_c(first_letter, soundex_code, sep = "-")) %>%
 bind_cols(names, .) %>%
 select(Name = 1,`Soundex Code` = 5)
identical(result, test)
# > TRUE
                    
                  
R solution 3 for Generate Soundex Codes, proposed by Krzysztof Nowak:
 separate_longer_delim(cols = represents_the_letters,delim = ",") |>
 mutate(represents_the_letters = trimws(represents_the_letters))

Solving the challenge of Generate Soundex Codes with DAX

DAX solution 1 for Generate Soundex Codes, proposed by Zoran Milokanović&:
Soundex Code = 
VAR n = Table1[Name]
VAR l = LEN(n)
VAR s = UPPER(RIGHT(n, l - FIND(" ", n, 1, 0)))
VAR t1 = GROUPBY(ADDCOLUMNS(GENERATESERIES(2, LEN(s)), "L", MID(s, [Value], 1)), [L])
RETURN LEFT(LEFT(s, 1) & "-" & CONCATENATEX(NATURALINNERJOIN(t2, t1), [Num]) & "000", 5)
                    
                  

&&

Leave a Reply