Skip to content

Commit

Permalink
Add new nonagricultural classification.
Browse files Browse the repository at this point in the history
  • Loading branch information
makmanalp committed Aug 31, 2017
1 parent 6de9afb commit 290bac2
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 1 deletion.
8 changes: 8 additions & 0 deletions classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,18 @@

import csv
import datetime
import re

from unidecode import unidecode


def slugify(s):
"""Get a string like 'Foo Bar' and convert to foo_bar. Usually good for
creating codes from names, especially for languages with special
characters."""
return re.sub(r'[^a-zA-Z0-9\_]', '', s.replace(" ", "_").lower())


def load(path):
path = os.path.join(os.path.dirname(__file__), path)
return Classification.from_csv(path)
Expand Down
4 changes: 3 additions & 1 deletion product/Datlas/Rural/Tupfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@ LIVESTOCK = in/Livestock_Hierarchy.tsv in/Livestock_Names.tsv
AGPRODUCTS = in/AgProducts_Hierarchy.tsv in/AgProducts_Names.tsv
AGPRODUCTS_EXPANDED = in/AgProducts_Expanded_Hierarchy.tsv in/AgProducts_Expanded_Names.tsv
AGPRODUCTS_CENSUS = in/AgProducts_Census_Hierarchy.tsv in/AgProducts_Census_Names.tsv
NONAGRIC = in/Nonagric_Hierarchy.tsv in/Nonagric_Names.tsv
FARMTYPE = in/FarmType_Hierarchy.tsv in/FarmType_Names.tsv
LANDUSE = in/LandUse_Hierarchy.tsv in/LandUse_Names.tsv
FARMSIZE = in/FarmSize_Hierarchy.tsv in/FarmSize_Names.tsv

: |> $(PYTHON_PREFIX) download_sheets.py |> $(LIVESTOCK) $(AGPRODUCTS) $(AGPRODUCTS_CENSUS) $(AGPRODUCTS_EXPANDED) $(FARMTYPE) $(LANDUSE) $(FARMSIZE)
: |> $(PYTHON_PREFIX) download_sheets.py |> $(LIVESTOCK) $(AGPRODUCTS) $(AGPRODUCTS_CENSUS) $(AGPRODUCTS_EXPANDED) $(FARMTYPE) $(LANDUSE) $(FARMSIZE) $(NONAGRIC)
: $(LIVESTOCK) |> $(PYTHON_PREFIX) clean_livestock.py |> out/livestock.csv out/livestock.dta
: $(AGPRODUCTS_CENSUS) |> $(PYTHON_PREFIX) clean_agproducts_census.py |> out/agricultural_products_census.csv out/agricultural_products_census.dta
: $(AGPRODUCTS) |> $(PYTHON_PREFIX) clean_agproducts.py |> out/agricultural_products.csv out/agricultural_products.dta
: $(AGPRODUCTS_EXPANDED) |> $(PYTHON_PREFIX) clean_agproducts_expanded.py |> out/agricultural_products_expanded.csv out/agricultural_products_expanded.dta
: $(NONAGRIC) |> $(PYTHON_PREFIX) clean_nonagric.py |> out/nonagricultural_activities.csv out/nonagricultural_activities.dta
: $(FARMTYPE) |> $(PYTHON_PREFIX) clean_farmtype.py |> out/farm_type.csv out/farm_type.dta
: $(LANDUSE) |> $(PYTHON_PREFIX) clean_landuse.py |> out/land_use.csv
: $(FARMSIZE) |> $(PYTHON_PREFIX) clean_farmsizetype.py |> out/farm_size.csv out/farm_size.dta
37 changes: 37 additions & 0 deletions product/Datlas/Rural/clean_nonagric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import pandas as pd

from classification import (Hierarchy, repeated_table_to_parent_id_table,
parent_code_table_to_parent_id_table,
Classification, slugify)

if __name__ == "__main__":
names = pd.read_table("./in/Nonagric_Names.tsv", encoding="utf-8",
dtype={"code": str})

hierarchy = pd.read_table("./in/Nonagric_Hierarchy.tsv", encoding="utf-8")
hierarchy.columns = ["level3_code", "level2_code", "level0_code"]

names["code"] = names["code"].map(slugify)
hierarchy = hierarchy.applymap(slugify)

fields = {
"level0": [],
"level2": [],
"level3": []
}

h = Hierarchy(["level0", "level2", "level3"])
parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields)

parent_code_table = parent_code_table.merge(names, on=["code", "level"])

parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h)
parent_id_table["name"] = parent_id_table.name_en

parent_id_table = parent_id_table[["code", "name", "level", "name_en",
"name_es", "parent_id"]]

c = Classification(parent_id_table, h)

c.to_csv("out/nonagricultural_activities.csv")
c.to_stata("out/nonagricultural_activities.dta")
4 changes: 4 additions & 0 deletions product/Datlas/Rural/download_sheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
hierarchy.to_csv("./in/FarmType_Hierarchy.tsv", sep="\t", index=False, encoding="utf-8")
names.to_csv("./in/FarmType_Names.tsv", sep="\t", index=False, encoding="utf-8")

# Nonagric
hierarchy, names = get_classification_from_gdrive("https://docs.google.com/spreadsheets/d/1Fp4b5n69qyBg13JKotqQQotyLZhecLxsqYRc9Spb2Y8/edit#gid=1448799995")
hierarchy.to_csv("./in/Nonagric_Hierarchy.tsv", sep="\t", index=False, encoding="utf-8")
names.to_csv("./in/Nonagric_Names.tsv", sep="\t", index=False, encoding="utf-8")

# Land Use
hierarchy, names = get_classification_from_gdrive("https://docs.google.com/spreadsheets/d/17EoKvwQKujYRCKzh2odu--bpR0d2grigrWDn4CjRaeg/edit#gid=1207195644")
Expand Down
22 changes: 22 additions & 0 deletions product/Datlas/Rural/in/Nonagric_Hierarchy.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
level3 level2 level0
Commerce different from food and alcoholic beverages commerce total
Food and alcoholic beverages commerce commerce total
Gas generation and energy transmission minning or industry total
Minning with titles minning or industry total
Minning without titles minning or industry total
Oil minning or industry total
Plastic, metallurgical, and chemical products manufacturing minning or industry total
Agricultural products transformation raw process total
Biofuels production raw process total
Crafts production raw process total
Food and alcoholic beverages production raw process total
Forestry products transformation raw process total
Livestock products transformation raw process total
Education services services total
Environmental activities services total
Health services services total
National security services services total
Recreative services services total
Religious services services total
Support for agricultural activities services total
Turistic and hotel services services total
27 changes: 27 additions & 0 deletions product/Datlas/Rural/in/Nonagric_Names.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
code level name_en name_short_en name_es name_short_es
total level0 Total Total Total Total
commerce level2 Commerce Commerce Comercio Comercio
minning or industry level2 Mining or Industry Mining or Industry Minera o industria Minera o industria
raw process level2 Raw Process Raw Process Transformación de productos agropecuarios Transf. de productos agrop.
services level2 Services Services Servicios Servicios
Commerce different from food and alcoholic beverages level3 Commerce different from food and alcoholic beverages Commerce different from food and alcoholic beverages Comercio de productos distintos a alimenticios y bebidas alcohólicas Comercio de productos distintos a alimenticios y bebidas alcohólicas
Food and alcoholic beverages commerce level3 Food and alcoholic beverages commerce Food and alcoholic beverages commerce Comercio de productos alimenticios y bebidas alcohólicas Comercio de productos alimenticios y bebidas alcohólicas
Gas generation and energy transmission level3 Gas generation and energy transmission Gas generation and energy transmission Gas, generación y trasmisión de energí­a Gas, generación y trasmisión de energí­a
Minning with titles level3 Mining with titles Mining with titles Minerí­a con tí­tulos Minerí­a con tí­tulos
Minning without titles level3 Mining without titles Mining without titles Minerí­a sin tí­tulos Minerí­a sin tí­tulos
Oil level3 Oil Oil Petróleo Petróleo
Plastic, metallurgical, and chemical products manufacturing level3 Plastic, metallurgical, and chemical products manufacturing Plastic, metallurgical, and chemical products manufacturing Fabricación de plástico, metalúrgicos, sustancias y productos quí­micos Fabricación de plástico, metalúrgicos, sustancias y productos quí­micos
Agricultural products transformation level3 Agricultural products transformation Agricultural products transformation Transformación de productos agrí­colas Transformación de productos agrí­colas
Biofuels production level3 Biofuels production Biofuels production Elaboración de biocombustibles Elaboración de biocombustibles
Crafts production level3 Crafts production Crafts production Elaboración de artesaní­as Elaboración de artesaní­as
Food and alcoholic beverages production level3 Food and alcoholic beverages production Food and alcoholic beverages production Elaboración de alimentos y bebidas alcohólicas Elaboración de alimentos y bebidas alcohólicas
Forestry products transformation level3 Forestry products transformation Forestry products transformation Transformación de productos forestales Transformación de productos forestales
Livestock products transformation level3 Livestock products transformation Livestock products transformation Transformación de productos pecuarios Transformación de productos pecuarios
Education services level3 Education services Education services Servicios de educación Servicios de educación
Environmental activities level3 Environmental activities Environmental activities Actividades ambientales Actividades ambientales
Health services level3 Health services Health services Servicios de salud Servicios de salud
National security services level3 National security services National security services Servicios de seguridad nacional Servicios de seguridad nacional
Recreative services level3 Recreative services Recreative services Servicios recreativos Servicios recreativos
Religious services level3 Religious services Religious services Servicios religiosos Servicios religiosos
Support for agricultural activities level3 Support for agricultural activities Support for agricultural activities Actividades de apoyo a la actividad agropecuaria Actividades de apoyo a la actividad agropecuaria
Turistic and hotel services level3 Turistic and hotel services Turistic and hotel services Servicios turí­sticos, de alojamiento, hospedaje y otros Servicios turí­sticos, de alojamiento, hospedaje y otros
27 changes: 27 additions & 0 deletions product/Datlas/Rural/out/nonagricultural_activities.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"","code","name","level","name_en","name_es","parent_id"
0,"total","Total","level0","Total","Total",""
1,"commerce","Commerce","level2","Commerce","Comercio",0.0
2,"commerce_different_from_food_and_alcoholic_beverages","Commerce different from food and alcoholic beverages","level3","Commerce different from food and alcoholic beverages","Comercio de productos distintos a alimenticios y bebidas alcohólicas",1.0
3,"food_and_alcoholic_beverages_commerce","Food and alcoholic beverages commerce","level3","Food and alcoholic beverages commerce","Comercio de productos alimenticios y bebidas alcohólicas",1.0
4,"minning_or_industry","Mining or Industry","level2","Mining or Industry","Minera o industria",0.0
5,"gas_generation_and_energy_transmission","Gas generation and energy transmission","level3","Gas generation and energy transmission","Gas, generación y trasmisión de energí­a",4.0
6,"minning_with_titles","Mining with titles","level3","Mining with titles","Minerí­a con tí­tulos",4.0
7,"minning_without_titles","Mining without titles","level3","Mining without titles","Minerí­a sin tí­tulos",4.0
8,"oil","Oil","level3","Oil","Petróleo",4.0
9,"plastic_metallurgical_and_chemical_products_manufacturing","Plastic, metallurgical, and chemical products manufacturing","level3","Plastic, metallurgical, and chemical products manufacturing","Fabricación de plástico, metalúrgicos, sustancias y productos quí­micos",4.0
10,"raw_process","Raw Process","level2","Raw Process","Transformación de productos agropecuarios",0.0
11,"agricultural_products_transformation","Agricultural products transformation","level3","Agricultural products transformation","Transformación de productos agrí­colas",10.0
12,"biofuels_production","Biofuels production","level3","Biofuels production","Elaboración de biocombustibles",10.0
13,"crafts_production","Crafts production","level3","Crafts production","Elaboración de artesaní­as",10.0
14,"food_and_alcoholic_beverages_production","Food and alcoholic beverages production","level3","Food and alcoholic beverages production","Elaboración de alimentos y bebidas alcohólicas",10.0
15,"forestry_products_transformation","Forestry products transformation","level3","Forestry products transformation","Transformación de productos forestales",10.0
16,"livestock_products_transformation","Livestock products transformation","level3","Livestock products transformation","Transformación de productos pecuarios",10.0
17,"services","Services","level2","Services","Servicios",0.0
18,"education_services","Education services","level3","Education services","Servicios de educación",17.0
19,"environmental_activities","Environmental activities","level3","Environmental activities","Actividades ambientales",17.0
20,"health_services","Health services","level3","Health services","Servicios de salud",17.0
21,"national_security_services","National security services","level3","National security services","Servicios de seguridad nacional",17.0
22,"recreative_services","Recreative services","level3","Recreative services","Servicios recreativos",17.0
23,"religious_services","Religious services","level3","Religious services","Servicios religiosos",17.0
24,"support_for_agricultural_activities","Support for agricultural activities","level3","Support for agricultural activities","Actividades de apoyo a la actividad agropecuaria",17.0
25,"turistic_and_hotel_services","Turistic and hotel services","level3","Turistic and hotel services","Servicios turí­sticos, de alojamiento, hospedaje y otros",17.0
Binary file not shown.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
'product/Datlas/Rural/out/agricultural_products.csv',
'product/Datlas/Rural/out/agricultural_products_expanded.csv',
'product/Datlas/Rural/out/agricultural_products_census.csv',
'product/Datlas/Rural/out/nonagricultural_activities.csv',
'product/Datlas/Rural/out/farm_type.csv',
'product/Datlas/Rural/out/farm_size.csv',
'product/HS/Atlas/out/hs92_atlas.csv',
Expand Down

0 comments on commit 290bac2

Please sign in to comment.