Skip to content

Commit

Permalink
Merge pull request #15 from cid-harvard/feature-industry-data-muni
Browse files Browse the repository at this point in the history
Industry muni level data import + API
  • Loading branch information
makmanalp committed Jun 29, 2015
2 parents 3f582a2 + 39dfb51 commit 449ffb0
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 35 deletions.
8 changes: 8 additions & 0 deletions colombia/api_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ class Meta:
"department_id", "industry_id", "year")


class MunicipalityIndustryYearSchema(ma.Schema):

class Meta:
fields = ("employment", "wages", "rca", "distance", "cog", "coi",
"municipality_id", "industry_id", "year")


class DepartmentSchema(ma.Schema):

class Meta:
Expand Down Expand Up @@ -65,6 +72,7 @@ class ColombiaMetadataSchema(MetadataSchema):

department_product_year = DepartmentProductYearSchema(many=True)
department_industry_year = DepartmentIndustryYearSchema(many=True)
municipality_industry_year = MunicipalityIndustryYearSchema(many=True)
product_year = ProductYearSchema(many=True)
department = DepartmentSchema(many=True)
metadata = ColombiaMetadataSchema(many=True)
28 changes: 28 additions & 0 deletions colombia/data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,31 @@ def distance(self):
@distance.expression
def distance(cls):
return (1.0 - cls.density).label("distance")


class MunicipalityIndustryYear(BaseModel, IDMixin):

__tablename__ = "municipality_industry_year"

municipality_id = db.Column(db.Integer, db.ForeignKey(Location.id))
industry_id = db.Column(db.Integer, db.ForeignKey(Industry.id))
year = db.Column(db.Integer)

municipality = db.relationship(Location)
industry = db.relationship(Industry)

employment = db.Column(db.Integer)
wages = db.Column(db.Integer)

rca = db.Column(db.Integer)
density = db.Column(db.Float)
cog = db.Column(db.Float)
coi = db.Column(db.Float)

@hybrid_property
def distance(self):
return 1.0 - self.density

@distance.expression
def distance(cls):
return (1.0 - cls.density).label("distance")
11 changes: 10 additions & 1 deletion colombia/data/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from flask import Blueprint, request
from .models import (DepartmentProductYear, DepartmentIndustryYear,
ProductYear, Location, IndustryYear, DepartmentYear)
MunicipalityIndustryYear, ProductYear, Location,
IndustryYear, DepartmentYear)
from ..api_schemas import marshal
from .. import api_schemas as schemas

Expand Down Expand Up @@ -164,11 +165,19 @@ def industries_index(product_id=None):
q = DepartmentIndustryYear.query\
.filter_by(year=year, department_id=location_id)
return marshal(schemas.department_industry_year, q)
elif location_type == "municipality":
q = MunicipalityIndustryYear.query\
.filter_by(year=year, municipality_id=location_id)
return marshal(schemas.municipality_industry_year, q)
elif location_id is not None:
if location_type == "department":
q = DepartmentIndustryYear.query\
.filter_by(department_id=location_id)
return marshal(schemas.department_industry_year, q)
elif location_type == "municipality":
q = MunicipalityIndustryYear.query\
.filter_by(municipality_id=location_id)
return marshal(schemas.municipality_industry_year, q)

raise abort(400, body="Could not find data with the given parameters.")

Expand Down
71 changes: 38 additions & 33 deletions colombia/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,6 @@ def inner(line):
return inner


def make_iy(industry_map):
def inner(line):
iy = models.IndustryYear()
iy.industry = industry_map[line["i"]]
iy.year = int(line["year"])
iy.complexity = line["pci"]
return iy
return inner


def process_cpy(cpy, product_map, department_map):
"""Take a dataframe and return
Expand Down Expand Up @@ -331,37 +321,52 @@ def test_process_cpy(self):
db.session.add_all(cy)
db.session.commit()

# Classification.merge_to_table
# Classification.merge_index

def merge_to_table(classification, classification_name, df, merge_on):
code_to_id = classification.reset_index()[["code", "index"]]
code_to_id.columns = ["code", classification_name]
code_to_id = code_to_id.set_index("code")
return df.merge(code_to_id, left_on=merge_on,
right_index=True, how="left")

# Department - industry - year
df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_dpto.dta")
df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi", "pci"]]
df = df[df.i != "."]

df = df.merge(industry_classification.table, left_on="i",
right_on="code", how="inner")
df = merge_to_table(industry_classification.level("class"),
"industry_id", df, "i")
df = merge_to_table(location_classification.level("department"),
"department_id", df, "r")

def make_diy():
def inner(line):
dpy = models.DepartmentIndustryYear()
dpy.industry = industry_map[line["i"]]
dpy.department = location_map[line["r"]]
dpy.year = line["year"]
dpy.employment = line["E_yir"]
dpy.wages = line["W_yir"]
# Industry - Year
iy = df.groupby(["industry_id", "year"])[["pci"]].first().reset_index()
iy = iy.rename(columns={"pci": "complexity"})
iy.to_sql("industry_year", db.engine, index=False,
chunksize=10000, if_exists="append")

dpy.rca = line["rca"]
dpy.density = line["density"]
dpy.cog = line["cog"]
dpy.coi = line["coi"]

return dpy
return inner
cpy_out = df.apply(make_diy(), axis=1)
db.session.add_all(cpy_out)
# Department - industry - year
df = df.rename(columns={"E_yir": "employment", "W_yir": "wages"})
df = df[["department_id", "industry_id", "year", "employment",
"wages", "rca", "density", "cog", "coi"]]
df.to_sql("department_industry_year", db.engine, index=False,
chunksize=10000, if_exists="append")

iy = df.groupby(["i", "year"])[["pci"]].first().reset_index()
iy_out = iy.apply(make_iy(industry_map), axis=1)
db.session.add_all(iy_out)

# Municipality - industry - year
df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_mun.dta")
df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi", "pci"]]
df = df[df.i != "."]

db.session.commit()
df = merge_to_table(industry_classification.level("class"),
"industry_id", df, "i")
df = merge_to_table(location_classification.level("municipality"),
"municipality_id", df, "r")

df = df.rename(columns={"E_yir": "employment", "W_yir": "wages"})
df = df[["municipality_id", "industry_id", "year", "employment",
"wages", "rca", "density", "cog", "coi"]]
df.to_sql("municipality_industry_year", db.engine, index=False,
chunksize=10000, if_exists="append")
2 changes: 1 addition & 1 deletion colombia/models.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .metadata.models import Metadata, HSProduct, Location, Industry
from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear, IndustryYear
from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear, IndustryYear, MunicipalityIndustryYear

0 comments on commit 449ffb0

Please sign in to comment.