Skip to content

Commit

Permalink
Remove unneeded manual edits after timageddon fix in mexico industries
Browse files Browse the repository at this point in the history
  • Loading branch information
makmanalp committed Nov 12, 2015
1 parent e9f2f4c commit a4ab5c9
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 23 deletions.
21 changes: 0 additions & 21 deletions industry/NAICS/Mexico_datlas/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,11 @@

h = Hierarchy(["section", "division", "group", "class"])
parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields)

# Workaround for Timageddon issue - Tim made some codes like 31 / 32 / 33
# fall under different sections sometimes, and this breaks the strict
# treelike hierarchy of the classification. This happens mainly at the
# division level. We've decided to cut our losses right now and pick one.
# This comes at the expense of possibly mis-assigning some class level
# codes to different codes but until someone either fixes the
# classification or the classification system entirely, there's not much we
# can do here.

parent_code_table = parent_code_table[~parent_code_table.duplicated(["code", "level"])]
parent_code_table = parent_code_table.reset_index(drop=True)

parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h)

names = pd.read_table("in/Mexico_industry_master - Names.tsv", encoding="utf-8")
parent_id_table = parent_id_table.merge(names, on=["code", "level"], how="outer")

# We also decided to rename the one category these things fall into to be
# "Other manufacturing industries", since that seems to fit well and is
# less confusing.

metals_section = (parent_id_table.level == "section") & (parent_id_table.code == 5)
parent_id_table.loc[metals_section, ("name_en", "name_short_en")] = u"Manufacturing Industries"
parent_id_table.loc[metals_section, ("name_es", "name_short_es")] = u"Industrias Manufactureras"

parent_id_table["name"] = parent_id_table["name_en"]
parent_id_table.code = parent_id_table.code.astype(str)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
33,"2123","class",30.0,"Nonmetallic ore mining","Nonmetallic ore mining","Mineria de minerales no metalicos","Mineria de minerales no metalicos","Nonmetallic ore mining"
34,"213","group",27.0,"Services related to mining","Services related to mining","Servicios relacionados con la mineria","Servicios relacionados con la mineria","Services related to mining"
35,"2131","class",34.0,"Services related to mining","Services related to mining","Servicios relacionados con la mineria","Servicios relacionados con la mineria","Services related to mining"
36,"5","section","","Manufacturing Industries","Manufacturing Industries","Industrias Manufactureras","Industrias Manufactureras","Manufacturing Industries"
36,"5","section","","Utilities, Transport & Communications","Utilities, Transport & Communications","Electricidad, Transporte y Comunicaciones","Electricidad, Transporte y Comunicaciones","Utilities, Transport & Communications"
37,"22","division",36.0,"Electric power generation, transmission and distribution, water and gas supply through mains to final consumers","Electric power generation, transmission and distribution, water and gas supply through mains to final consumers","Generacion, transmision y distribucion de energia electrica, suministro de agua y de gas por ductos al consumidor final","Generacion, transmision y distribucion de energia electrica, suministro de agua y de gas por ductos al consumidor final","Electric power generation, transmission and distribution, water and gas supply through mains to final consumers"
38,"221","group",37.0,"Electric power generation, transmission and distribution","Electric power generation, transmission and distribution","Generacion, transmision y distribucion de energia electrica","Generacion, transmision y distribucion de energia electrica","Electric power generation, transmission and distribution"
39,"2211","class",38.0,"Electric power generation, transmission and distribution","Electric power generation, transmission and distribution","Generacion, transmision y distribucion de energia electrica","Generacion, transmision y distribucion de energia electrica","Electric power generation, transmission and distribution"
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="linnaeus",
version="v0.0.45",
version="v0.0.46",
author="Mali Akmanalp <Harvard CID>",
description=("Harvard CID's classification tools."),
url="http://github.com/cid-harvard/classifications/",
Expand Down

0 comments on commit a4ab5c9

Please sign in to comment.