diff --git a/Project.toml b/Project.toml index b9887b6..e88e755 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,12 @@ name = "IPUMS" uuid = "51d1f77e-d457-4c14-a89d-9ed71839f38d" -authors = ["TheCedarPrince and Krishna Bhogaonker"] +authors = ["TheCedarPrince and Krishna Bhogaonker "] version = "0.0.1" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" OpenAPI = "d5e62ea6-ddf3-4d43-8e4c-ad5e6c8bfd7d" diff --git a/src/IPUMS.jl b/src/IPUMS.jl index eb415b2..a0979a9 100644 --- a/src/IPUMS.jl +++ b/src/IPUMS.jl @@ -13,7 +13,8 @@ module IPUMS Client using TimeZones using URIs - + using EzXML + include("structs.jl") include("constants.jl") include("helpers.jl") @@ -30,6 +31,7 @@ module IPUMS include("modelincludes.jl") include("apis/api_DefaultApi.jl") include("piracy.jl") + include("parsers/ddi_parser.jl") #= export from_json @@ -57,5 +59,5 @@ module IPUMS export Client export DefaultApi - + export parse_ddi end diff --git a/src/constants.jl b/src/constants.jl index e83c692..c1090fe 100644 --- a/src/constants.jl +++ b/src/constants.jl @@ -88,3 +88,59 @@ const global ipums_sources = [ collection_type = "microdata" ) ] + + +#= + +XPaths for parsing DDI XML files + +This is a list of XPATHs for locating file-level and variable-level metadata +in a DDI XML file. The constants are stored here, but used in the `ddi_parser.jl` +file. + +Extract level XPATHs: + +EXTRACT_CONDITIONS - IPUMS conditions for fair and legal use of public data. +EXTRACT_CITATION - Citation information for referencing IPUMS data +EXTRACT_IPUMS_PROJECT - Name of the IPUMS project from which the data is taken, such as CPS, or DHS, etc. +EXTRACT_NOTES - User provided notes or additional miscellaneous information provided about the extract. +EXTRACT_DATE - The date that the extract was generated. + +Variable level XPATHs: + +VAR_NODE_LOCATION = The base nodes that correspond to each variable in the dataset. +VAR_NAME_XPATH = The name of the IPUMS variable. +VAR_STARTPOS_XPATH = The start position (in text columns) of the variable in a fixed width file specification. +VAR_ENDPOS_XPATH = The end position (in text columns) of the variable in a fixed width file specification. +VAR_WIDTH_XPATH = The width postion (in text columns) of variable in a fixed width file specification. +VAR_LABL_XPATH = A short description for the data contained in a variable. +VAR_TXT_XPATH = A longer and more complete description of the data contained in a variable. +VAR_DCML_XPATH = The number of decimal points contained in a variable. +VAR_TYPE_XPATH = An indicator of whether the variable is either a string or numeric data type. +VAR_INTERVAL_XPATH = An indicator of whether a numeric variable is continuous or discrete. +VAR_CATEGORY_XPATH = A description of the category levels and corresponding numerical values for a categorical variable, such as + "Women => 0, Men => 1" + +=# + +const EXTRACT_CONDITIONS = "/x:codeBook/x:stdyDscr/x:dataAccs/x:useStmt/x:conditions" +const EXTRACT_CITATION = "/x:codeBook/x:stdyDscr/x:dataAccs/x:useStmt/x:citReq" +const EXTRACT_IPUMS_PROJECT = "/x:codeBook/x:stdyDscr/x:citation/x:serStmt/x:serName" +const EXTRACT_NOTES = "/x:codeBook/x:stdyDscr/x:notes" +const EXTRACT_DATE = "/x:codeBook/x:stdyDscr/x:citation/x:prodStmt/x:prodDate/@date" + +const VAR_NODE_LOCATION = "/x:codeBook/x:dataDscr/x:var" +const VAR_NAME_XPATH = "/x:codeBook/x:dataDscr/x:var/@name" +const VAR_STARTPOS_XPATH = "/x:codeBook/x:dataDscr/x:var/x:location/@StartPos" +const VAR_ENDPOS_XPATH = "/x:codeBook/x:dataDscr/x:var/x:location/@EndPos" +const VAR_WIDTH_XPATH = "/x:codeBook/x:dataDscr/x:var/x:location/@width" +const VAR_LABL_XPATH = "/x:codeBook/x:dataDscr/x:var/x:labl" +const VAR_TXT_XPATH = "/x:codeBook/x:dataDscr/x:var/x:txt" +const VAR_DCML_XPATH = "/x:codeBook/x:dataDscr/x:var/@dcml" +const VAR_TYPE_XPATH = "/x:codeBook/x:dataDscr/x:var/x:varFormat/@type" +const VAR_INTERVAL_XPATH = "/x:codeBook/x:dataDscr/x:var/@intrvl" +const VAR_CATEGORY_XPATH = "/x:codeBook/x:dataDscr/x:var/x:catgry" + + + + diff --git a/src/parsers/ddi_parser.jl b/src/parsers/ddi_parser.jl new file mode 100644 index 0000000..4d400a2 --- /dev/null +++ b/src/parsers/ddi_parser.jl @@ -0,0 +1,310 @@ + + +""" + parse_ddi(filepath::String) + +Parses a valid IPUMS DDI XML file and returns a `DDIInfo` object containing +the IPUMS extract metadata. + +### Arguments + +- `filepath::String` -- A string containing the path to the IPUMS DDI XML file. + +### Returns + +A `DDIInfo` object that contains all of the file-level and variable-level +metadata for the IPUMS extract. + +Please check the documentation for `DDIInfo` for more information about this +specific object. + +# Examples + +Let's assume we have an extract DDI file named `my_extract.xml` +```julia-repl +julia> typeof(parse_ddi("my_extract.xml")) +IPUMS.DDIInfo +``` +""" +function parse_ddi(filepath::String) + + # check to make sure the provided file is an xml file. + _check_that_file_is_xml(filepath) + + # check to make sure file exists + _check_that_file_exists(filepath) + + # read xml file and parse extract level metadata + + ddifile = EzXML.readxml(filepath) + ns = EzXML.namespace(ddifile.root) + + ddi = DDIInfo(filepath=filepath, _xml_doc=ddifile, _ns=ns) + + _read_ddi_and_parse_extract_level_metadata!(ddi) + + # get variables metadata from file + _get_var_metadata_from_ddi!(ddi) + + return ddi + +end + + +""" + _check_that_file_is_xml(filepath::String) + +This is an internal function and checks whether the provided file is an XML + file. All DDI files should be in XML format. + +### Arguments + +- `filepath::String` - A file path that the user wishes to parse. The file must be + an XML file. + +### Returns + +The function returns nothing if the file is an XML file. If the file is not + an XML file, then the function raises an `ArgumentError`. +""" +function _check_that_file_is_xml(filepath::String) + + extension = filepath[findlast(==('.'), filepath)+1:end] + + if extension != "xml" + throw(ArgumentError("The DDI file: $filepath should be an XML file.")) + else + return true + end + +end + + +""" + _check_that_file_exists(filepath::String) + +This is an internal function and checks whether the provided file exists or not. + +### Arguments + +- `filepath::String` - A file path that the user wishes to parse. The file must be + an existing XML file. + +### Returns + +The function returns nothing if the file exists. If the file does not exist, + then the function raises an `ArgumentError`. +""" +function _check_that_file_exists(filepath::String) + + if !isfile(filepath) + throw(ArgumentError("The specified file: $filepath does not exist.")) + else + return true + end +end + +""" + _read_ddi_and_parse_extract_level_metadata!(ddi::DDIInfo) + +This is an internal function and not meant for the public API. This function +parses the DDI XML file and captures the file-level metadata. + +### Arguments + +- `ddi::DDIInfo` - A `DDIInfo` object that will retain all of the parsed metadata. + +### Returns + +The function return the original `DDIInfo` object with updated data in the +attributes. +""" +function _read_ddi_and_parse_extract_level_metadata!(ddi::DDIInfo) + + xmlroot = ddi._xml_doc.root + ns = ddi._ns + + ddi.conditions = EzXML.findall(EXTRACT_CONDITIONS, xmlroot, ["x" => ns])[1].content + ddi.citation = EzXML.findall(EXTRACT_CITATION, xmlroot, ["x" => ns])[1].content + ddi.ipums_project = EzXML.findall(EXTRACT_IPUMS_PROJECT, xmlroot, ["x" => ns])[1].content + ddi.extract_notes = EzXML.findall(EXTRACT_NOTES, xmlroot, ["x" => ns])[1].content + ddi.extract_date = EzXML.findall(EXTRACT_DATE, xmlroot, ["x" => ns])[1].content + +end + +""" + _get_var_metadata_from_ddi!(ddi::DDIInfo) + +This is an internal function and not meant for the public API. This function + iterates over the variable nodes in the DDI XML file nodes. The data + from each variable node is collected in a `DDIVariable` object, and a + vector of those `DDIVariable` object is save in the `DDIInfo` object. + +### Arguments + +- `ddi::DDIInfo` - A `DDIInfo` object that will retain all of the parsed metadata. + +### Returns + +The function return the original `DDIInfo` object with updated data in the +attributes. + +""" +function _get_var_metadata_from_ddi!(ddi::DDIInfo) + + xmlroot = ddi._xml_doc.root + ns = ddi._ns + + name_nodes = EzXML.findall(VAR_NAME_XPATH, xmlroot, ["x" => ns]) + startpos_nodes = EzXML.findall(VAR_STARTPOS_XPATH, xmlroot, ["x" => ns]) + endpos_nodes = EzXML.findall(VAR_ENDPOS_XPATH, xmlroot, ["x" => ns]) + width_nodes = EzXML.findall(VAR_WIDTH_XPATH, xmlroot, ["x" => ns]) + labl_nodes = EzXML.findall(VAR_LABL_XPATH, xmlroot, ["x" => ns]) + txt_nodes = EzXML.findall(VAR_TXT_XPATH, xmlroot, ["x" => ns]) + dcml_nodes = EzXML.findall(VAR_DCML_XPATH, xmlroot, ["x" => ns]) + vartype_nodes = EzXML.findall(VAR_TYPE_XPATH, xmlroot, ["x" => ns]) + varinterval_nodes = EzXML.findall(VAR_INTERVAL_XPATH, xmlroot, ["x" => ns]) + + name_vec = [v.content for v in name_nodes] + startpos_vec = parse.(Int64, [v.content for v in startpos_nodes]) + endpos_vec = parse.(Int64, [v.content for v in endpos_nodes]) + width_vec = parse.(Int64, [v.content for v in width_nodes]) + labl_vec = [v.content for v in labl_nodes] + txt_vec = [v.content for v in txt_nodes] + dcml_vec = parse.(Int64, [v.content for v in dcml_nodes]) + vartype_vec = [v.content for v in vartype_nodes] + varinterval_vec = [v.content for v in varinterval_nodes] + + + # This loop iterates over each variable and identifies its datatype. + # The notations coded in the original DDI file are somewhat ambiguous, + # and hence the datatype must be manually identified before data import. + + var_dtype_vec = DataType[] + for i in eachindex(name_nodes) + if (vartype_vec[i] == "numeric") && (dcml_vec[i] == 0) + push!(var_dtype_vec, Int64) + elseif vartype_vec[i] == "numeric" + push!(var_dtype_vec, Float64) + else + push!(var_dtype_vec, String) + end + end + # This loop iterates over all variables to find variables that are + # categorically coded, such as (1 => "Female", 2 => "Male"). For variables + # that are categorically coded, the `` tag includes information on + # the categories and their corresponding numerical indices. + # The loop saves this coding information to a vector of key, value pairs. + + varnodes = EzXML.findall(VAR_NODE_LOCATION, xmlroot, ["x" => ns]) + category_vec = Union{Vector{@NamedTuple{val::Int64, labl::String}}, Nothing}[] + for i in eachindex(varnodes) + n = EzXML.findall("x:catgry", varnodes[i], ["x" => ns]) + if length(n) > 0 + catvalue_nodes = EzXML.findall("x:catgry/x:catValu", varnodes[i], ["x" => ns]) + l_nodes = EzXML.findall("x:catgry/x:labl", varnodes[i], ["x" => ns]) + + # QUESTION: does this parse statement need a try...catch? + catvalue_vec = parse.(Int64, [v.content for v in catvalue_nodes]) + l_vec = [v.content for v in l_nodes] + push!(category_vec, [(val = catvalue_vec[i], labl = l_vec[i]) for i=1:length(catvalue_vec)]) + + else + push!(category_vec, nothing) + end + end + + # The `coder instructions ` tag contains additional information + # on how a variable is coded and used. Not all variables have this tag. + # This loop iterates over all variables and saves the contents of the coder + # instructions tag to the array if it exists. If a variable is missing the + # coder instructions tag, then `nothing` is saved to the array. + + regex = r"^(?[[:graph:]]+)(([[:blank:]]+[[:punct:]|=]+[[:blank:]])+)(?.+)$"m + regex2 = r"^(?[[:graph:]]+)(([[:blank:]]+[[:punct:]|=]+[[:blank:]])+)(?.+)$"m + + coder_instr_vec = Union{String, Nothing}[] + for i in eachindex(varnodes) + n = EzXML.findall("x:codInstr", varnodes[i], ["x" => ns]) + if length(n) > 0 + coder_instr_nodes = EzXML.findall("x:codInstr", varnodes[i], ["x" => ns]) + push!(coder_instr_vec, coder_instr_nodes[1].content) + + # These 2 regex statements are taken directly from the IPUMSR R + # package, starting on line 911 of the ddi_read.R file. There + # are 2 different regex statements that might match the unstructured + # text, hence we match on both statements. If the first match succeeds + # we privilege it, otherwise we consider the second match. + + matches = [(val=_string_to_num(m[:val]), labl=m[:lbl]) for m in eachmatch(regex, coder_instr_nodes[1].content)] + matches2 = [(val=_string_to_num(m[:val]), labl=m[:lbl]) for m in eachmatch(regex2, coder_instr_nodes[1].content)] + if (length(matches) > 0) && (!isnothing(category_vec[i])) + append!(category_vec[i], matches) + elseif (length(matches) > 0) && (isnothing(category_vec[i])) + category_vec[i] = matches + elseif (length(matches2) > 0) && (!isnothing(category_vec[i])) + append!(category_vec[i], matches2) + elseif (length(matches2) > 0) && (isnothing(category_vec[i])) + category_vec[i] = matches2 + end + else + push!(coder_instr_vec, nothing) + end + end + + # Prepare the metadata summary dataframe, to help display the results. + # Save that data summary to the DDIInfo object. + + categorical_vec = [ifelse(isnothing(r), "No" , "Yes") for r in category_vec] + summary_df = DataFrame([name_vec, labl_vec, var_dtype_vec, startpos_vec, + endpos_vec, categorical_vec], + [:name, :description, :datatype, :start_pos, + :end_pos, :categorical]) + ddi.data_summary = summary_df + + # This loop creates DDIVariable objects for each variable in the dataset, + # and pushes those objects into a vector. We will later use this information + # when configuring the dataframe column names, datatypes, metadata, etc. + var_vector = DDIVariable[] + for i in eachindex(name_nodes) + dv = DDIVariable(name=name_vec[i], + position_start=startpos_vec[i], + position_end=endpos_vec[i], + position_width=width_vec[i], + labl=labl_vec[i], + desc=txt_vec[i], + dcml=dcml_vec[i], + var_dtype=var_dtype_vec[i], + var_interval=varinterval_vec[i], + category_labels=category_vec[i], + coder_instructions=coder_instr_vec[i]) + push!(var_vector, dv) + end + + ddi.variable_info = var_vector + +end + + +""" + _string_to_num(x::SubString{String}) + +This is an internal function and not meant for the public API. This function + takes a text string and returns only the numeric portion of the string. + For example in the input is "Codes999999", the function will return an + Int64 with the value 999999. + +### Arguments + +- `x::SubString{String}` - A string that may contain some numeric data mixed with text. + +### Returns + +This function returns the numeric part of the string, coded as an Int64 datatype. + +""" +function _string_to_num(x::SubString{String}) + n = [v.match for v in eachmatch(r"[0-9]+", x)][1] + return parse(Int64, n) +end + diff --git a/src/structs.jl b/src/structs.jl index 25924f1..ccc1e55 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -50,3 +50,183 @@ IPUMS.IPUMSSource("IPUMS USA", "usa", "microdata", "", true, "") home_url::String = "" IPUMSSource(proj_name::String, url_name::String, collection_type::String, code_for_api::String, api_support::Bool, home_url::String) = new(proj_name, url_name, collection_type, code_for_api, api_support, home_url) end + + +""" +```julia +DDIVariable( + name::String = "", + position_start::Int64 = 9999, + position_end::Int64 = 9999, + position_width::Int64 = 9999, + labl::String = "", + desc::String = "", + dcml::Int64 = 9999, + var_dtype::DataType = String, + var_interval::String = "", + category_labels::Union{Vector{Pair{Int64, String}}, Nothing} = nothing + coder_instructions::Union{String, Nothing} = nothing +) +``` + +A struct representing individual variable/column metadata from an IPUMS extract +file. This stuct is used for parsing the IPUMS datafile, which may be of fixed +width format, hence the `position_` fields. The default value for missing +strings is an empty string `""`, while the default value for missing integer values +is 9999. + + +# Keyword Arguments + +- `name::String` - Name of the variable, as per the column name of the IPUMS + extract file. This name is limited to 8 characters. +- `position_start::Int64` - The starting position (in columns) of a variable + in a fixed width file format. +- `position_end::Int64` - The ending position (in columns) of a variable + in a fixed width file format. +- `position_width::Int64` - The length (in columns) of a variable in a fixed + width file format. +- `labl::String` - A short description of the variable. Often the `labl` is + used to display a description of the variable in a + dataframe or display. +- `desc::String` - A longer description of the variable, including information + about the use of the variable. +- `dcml::Int64` - Identifies the number of decimal points in the variable. +- `var_dtype::DataType` - Indentifies the Julia data type of the variable. +- `var_interval::String` - Identifies if a numeric variable is discrete or + continuous. +- `category_labels::Union{Vector{Pair{Int64, String}}, Nothing}` - If a variable is + categorical, then this is a vector of (key, value) pairs, where + the `key` is a numerical index and the `value` is the category + label, for example `(1 => "category 1")`. If a variable is not + categorical, then this attribute has a value of `nothing`. +- `coder_instructions::Union{String, Nothing}` - Contains any additional + information about how the variable was coded and how it + should be treated. + +# Returns + +- `DDIVariable` object specifying the metadata for each variable. + +# Example + +```julia-repl +julia> IPUMS.DDIVariable( + name = "YEAR", + position_start = 1, + position_end = 4, + position_width = 4, + labl = "Survey year", + desc = "YEAR reports the year in which the survey was conducted. YEARP is repeated on person records.", + dcml = 0, + var_dtype = String, + var_interval = "continuous", + category_labels = nothing, + coder_instructions = nothing + ) + +IPUMS.DDIVariable("YEAR", 1, 4, 4, "Survey year", "YEAR reports the year in which the survey was conducted. YEARP is repeated on person records.", 0, Int64, "continuous", nothing, nothing) +``` + +# References + +Information about each variable field is taken from: + +https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/field_level_documentation_files/schemas/codebook_xsd/elements/stdyDscr.html +""" +@kwdef mutable struct DDIVariable + name::String = "" + position_start::Int64 = 9999 + position_end::Int64 = 9999 + position_width::Int64 = 9999 + labl::String = "" + desc::String = "" + dcml::Int64 = 9999 + var_dtype::DataType = String + var_interval::String = "" + category_labels::Union{Vector{@NamedTuple{val::Int64, labl::String}}, Nothing} = nothing + coder_instructions::Union{String, Nothing} = nothing +end + + +""" +```julia +DDIInfo( + filepath::String, + conditions::String = "", + citation::String = "", + ipums_project::String = "", + extract_notes::Sring = "", + extract_date::String = "", + variable_info::Vector{DDIVariable} = DDIVariable[] + _xml_doc::EzXML.Document = EzXML.XMLDocument() + _ns::String = "" + data_summary::DataFrame = DataFrame() +) +``` + +A struct representing the metadata taken from an IPUMS extract. An IPUMS +extract contains both file-level metadata (such as the date of export), as well +as variable level metadata (such as the name and data type of a variable). + +The `DDIInfo` object is not generally called directly. The `parse_ddi()` +function creates a `DDIinfo` object after successfully parsing a DDI +file from an IPUMS extract. + +The `DDIInfo` +object contains file level metadata. The `variable_info` field of the `DDIInfo` +object contains a vector of `DDIVariable` objects. `DDIVariable` objects contain +metadata information about individual IPUMS variables. + +# Keyword Arguments + +- `filepath::String` - File system path to the DDI (`.xml`) file. +- `conditions::String` - IPUMS legal specification on the proper use of IPUMS + data. +- `citation::String` - Information for the citation of IPUMS data. +- `ipums_project::String` - Identifier for the IPUMS source of the extract + data, such as `IPUMS CPS`, or `IPUMS USA`, etc. +- `extract_notes::String` - Additional clarifying information or user nodes + about the extract. +- `extract_date::String` - Date on which the extract was produced. +- `variable_info::Vector{DDIVariable}` - a vector of `DDIVariable` objects, + which contain metadata on each variable + or column in the data file. +- `_xml_doc::EzXML.Document` - An internal attribute that contains an internal + representation of the DDI DOM for parsing. +- `_ns::String` - An internal attribute to hold any namespaces used in the + XML DOM. +- `data_summary::DataFrame` - Contains a dataframe that holds summary information + about the variables in the dataset, including variable names, + data types, variable descriptions, and categorical information. + +# Returns + +- `DDIInfo` object that contains both file-level and variable-level metadata extracted from an IPUMS DDI (.xml) file. + +# Example + +```julia-repl +julia> IPUMS.DDIInfo(filepath = "test_ddi.xml") + +IPUMS.DDIInfo("test_ddi.xml", "", "", "", "", "", IPUMS.DDIVariable[], EzXML.Document(EzXML.Node()), "", 0×0 DataFrame) +``` + +# References + +Information about each variable field is taken from: + +https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/field_level_documentation_files/schemas/codebook_xsd/elements/var.html +""" +@kwdef mutable struct DDIInfo + filepath::String + conditions::String = "" + citation::String = "" + ipums_project::String = "" + extract_notes::String = "" + extract_date::String = "" + variable_info::Vector{DDIVariable} = DDIVariable[] + _xml_doc::EzXML.Document = EzXML.XMLDocument() + _ns::String = "" + data_summary::DataFrame = DataFrame() +end diff --git a/test/runtests.jl b/test/runtests.jl index ec5cc50..c1997ae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,19 @@ using DataFrames using IPUMS using Test + @testset "Helpers" begin res = ipums_data_collections() @test DataFrame == typeof(res) @test (14, 4) == size(res) end + +@testset "DDI Parser" begin + ddi = parse_ddi("testdata/cps_00157.xml") + @test ddi.extract_date == "2023-07-10" + @test ddi.variable_info[1].position_end == 4 + @test ddi._ns == "ddi:codebook:2_5" + @test size(ddi.data_summary) == (8,6) + @test_throws ArgumentError parse_ddi("testdata/cps_00157.dat.gz") + @test_throws ArgumentError parse_ddi("testdata/cps_00156.xml") +end diff --git a/test/testdata/cps_00157.dat.gz b/test/testdata/cps_00157.dat.gz new file mode 100644 index 0000000..ccd119d Binary files /dev/null and b/test/testdata/cps_00157.dat.gz differ diff --git a/test/testdata/cps_00157.xml b/test/testdata/cps_00157.xml new file mode 100644 index 0000000..6e8084e --- /dev/null +++ b/test/testdata/cps_00157.xml @@ -0,0 +1,544 @@ + + + + + + + Codebook for an IPUMS-CPS Data Extract + DDI 2.5 metadata describing the extract file 'cps_00157.dat' + ddi2-ef0cf890-f532-0138-e5de-0242ac1d0007-cps_00157.dat-cps.ipums.org + + + IPUMS + + + IPUMS + July 10, 2023 + IPUMS, 50 Willey Hall, 225 - 19th Avenue South, Minneapolis, MN 55455 + + + IPUMS + + + + + + + User Extract cps_00157.dat + + + IPUMS + + + IPUMS + July 10, 2023 + IPUMS, 50 Willey Hall, 225 - 19th Avenue South, Minneapolis, MN 55455 + + + IPUMS + + + IPUMS CPS + DOI:10.18128/D030.V10.0 + + + + + + + + Technical Variables -- HOUSEHOLD + Geographic Variables -- HOUSEHOLD + Technical Variables -- PERSON + Income Variables -- PERSON + + + 1962-03 + United States + + + 1963-03 + United States + + + + + + + + IPUMS-CPS + + + The user of the data acknowledges that the original collector of the data, the authorized distributor of the data, and the relevant funding agency bear no responsibility for use of the data or for interpretations or inferences based upon such uses. + + + + + + + cps_00157.dat + Microdata records + + ISO-8859-1 data file + fixed length fields + IPUMS, 50 Willey Hall, 225 - 19th Avenue South, Minneapolis, MN 55455 + + + + + + Survey year + + + Technical Variables -- HOUSEHOLD + + + + + Household serial number + + + Technical Variables -- HOUSEHOLD + + + + + Month + + + 01 + January + + + 02 + February + + + 03 + March + + + 04 + April + + + 05 + May + + + 06 + June + + + 07 + July + + + 08 + August + + + 09 + September + + + 10 + October + + + 11 + November + + + 12 + December + + Technical Variables -- HOUSEHOLD + + + + + Annual Social and Economic Supplement Household weight + + + Technical Variables -- HOUSEHOLD + + + + + State (FIPS code) + + + 01 + Alabama + + + 02 + Alaska + + + 04 + Arizona + + + 05 + Arkansas + + + 06 + California + + + 08 + Colorado + + + 09 + Connecticut + + + 10 + Delaware + + + 11 + District of Columbia + + + 12 + Florida + + + 13 + Georgia + + + 15 + Hawaii + + + 16 + Idaho + + + 17 + Illinois + + + 18 + Indiana + + + 19 + Iowa + + + 20 + Kansas + + + 21 + Kentucky + + + 22 + Louisiana + + + 23 + Maine + + + 24 + Maryland + + + 25 + Massachusetts + + + 26 + Michigan + + + 27 + Minnesota + + + 28 + Mississippi + + + 29 + Missouri + + + 30 + Montana + + + 31 + Nebraska + + + 32 + Nevada + + + 33 + New Hampshire + + + 34 + New Jersey + + + 35 + New Mexico + + + 36 + New York + + + 37 + North Carolina + + + 38 + North Dakota + + + 39 + Ohio + + + 40 + Oklahoma + + + 41 + Oregon + + + 42 + Pennsylvania + + + 44 + Rhode Island + + + 45 + South Carolina + + + 46 + South Dakota + + + 47 + Tennessee + + + 48 + Texas + + + 49 + Utah + + + 50 + Vermont + + + 51 + Virginia + + + 53 + Washington + + + 54 + West Virginia + + + 55 + Wisconsin + + + 56 + Wyoming + + + 61 + Maine-New Hampshire-Vermont + + + 65 + Montana-Idaho-Wyoming + + + 68 + Alaska-Hawaii + + + 69 + Nebraska-North Dakota-South Dakota + + + 70 + Maine-Massachusetts-New Hampshire-Rhode Island-Vermont + + + 71 + Michigan-Wisconsin + + + 72 + Minnesota-Iowa + + + 73 + Nebraska-North Dakota-South Dakota-Kansas + + + 74 + Delaware-Virginia + + + 75 + North Carolina-South Carolina + + + 76 + Alabama-Mississippi + + + 77 + Arkansas-Oklahoma + + + 78 + Arizona-New Mexico-Colorado + + + 79 + Idaho-Wyoming-Utah-Montana-Nevada + + + 80 + Alaska-Washington-Hawaii + + + 81 + New Hampshire-Maine-Vermont-Rhode Island + + + 83 + South Carolina-Georgia + + + 84 + Kentucky-Tennessee + + + 85 + Arkansas-Louisiana-Oklahoma + + + 87 + Iowa-N Dakota-S Dakota-Nebraska-Kansas-Minnesota-Missouri + + + 88 + Washington-Oregon-Alaska-Hawaii + + + 89 + Montana-Wyoming-Colorado-New Mexico-Utah-Nevada-Arizona + + + 90 + Delaware-Maryland-Virginia-West Virginia + + + 99 + State not identified + + Geographic Variables -- HOUSEHOLD + + Case selections: 27 Minnesota, 19 Iowa, 55 Wisconsin, 46 South Dakota, 38 North Dakota + + + + Person number in sample unit + + + Technical Variables -- PERSON + + + + + Annual Social and Economic Supplement Weight + + + Technical Variables -- PERSON + + + + + Total personal income + + + Income Variables -- PERSON + + + +