Source code for respy.pre_processing.specification_helpers

import itertools

import numpy as np
import pandas as pd

from respy.config import ROOT_DIR
from respy.shared import normalize_probabilities


[docs]def csv_template(n_types, n_type_covariates, initialize_coeffs=True): """Create a template for the parameter specification. Parameters ---------- n_types : int, optional Number of types in the model. Default is one. n_type_covariates : int, optional Number of covariates to predict type probabilities. Can be two or three. initialize_coeffs : bool, optional Whether coefficients are initialized with values or not. Default is ``True``. """ template = _base_template() if n_types > 1: to_concat = [ template, _type_prob_template(n_types, n_type_covariates), _type_shift_template(n_types), ] template = pd.concat(to_concat, axis=0, sort=False) if initialize_coeffs is False: template["value"] = np.nan return template
[docs]def _base_template(): base_template = pd.read_csv(ROOT_DIR / "pre_processing" / "base_params.csv") base_template.set_index(["category", "name"], inplace=True) return base_template
[docs]def _type_prob_template(n_types, n_type_covariates): to_concat = [] for type_ in range(1, n_types): if n_type_covariates == 3: ind = (f"type_{type_}", "constant") comment = f"constant effect on probability of being type {type_}" dat = [0, comment] to_concat.append(_base_row(index_tuple=ind, data=dat)) else: pass ind = (f"type_{type_}", "up_to_nine_years_edu") comment = ( "effect of up to nine years of schooling on probability of being " f"type {type_}" ) dat = [1 / n_types, comment] to_concat.append(_base_row(index_tuple=ind, data=dat)) ind = (f"type_{type_}", "at_least_ten_years_edu") comment = ( "effect of at least ten years of schooling on probability of being " f"type {type_}" ) dat = [0, comment] to_concat.append(_base_row(index_tuple=ind, data=dat)) return pd.concat(to_concat, axis=0, sort=False)
[docs]def _type_shift_template(n_types): to_concat = [] for type_ in range(1, n_types): for choice in ["a", "b", "edu", "home"]: if choice in ["a", "b"]: ind = (f"wage_{choice}", f"type_{type_}") else: ind = (f"nonpec_{choice}", f"type_{type_}") comment = f"deviation for type {type_} from type 0 in {choice}" dat = [0, comment] to_concat.append(_base_row(index_tuple=ind, data=dat)) return pd.concat(to_concat, axis=0, sort=False)
[docs]def initial_and_max_experience_template(edu_starts, edu_shares, edu_max): to_concat = [] for start, share in zip(edu_starts, edu_shares): ind = (f"initial_exp_edu_{start}", "probability") dat = [share, f"Probability that the initial level of education is {start}."] to_concat.append(_base_row(ind, dat)) ind = ("maximum_exp", "edu") dat = [edu_max, "Maximum level of experience for education"] to_concat.append(_base_row(ind, dat)) return pd.concat(to_concat, axis=0, sort=False)
[docs]def lagged_choices_probs_template(n_lagged_choices, choices): to_concat = [] for i in range(1, n_lagged_choices + 1): probs = np.random.uniform(size=len(choices)) probs = normalize_probabilities(probs) for j, choice in enumerate(choices): ind = (f"lagged_choice_{i}_{choice}", "constant") dat = [probs[j], f"Probability of choice {choice} being lagged choice {i}"] to_concat.append(_base_row(ind, dat)) return pd.concat(to_concat, axis=0, sort=False)
[docs]def lagged_choices_covariates_template(): return { "not_exp_a_lagged": "exp_a > 0 and lagged_choice_1 != 'a'", "not_exp_b_lagged": "exp_b > 0 and lagged_choice_1 != 'b'", "work_a_lagged": "lagged_choice_1 == 'a'", "work_b_lagged": "lagged_choice_1 == 'b'", "edu_lagged": "lagged_choice_1 == 'edu'", "returns_to_high_school": "~edu_lagged and ~hs_graduate", "returns_to_college": "~edu_lagged and hs_graduate", }
[docs]def _base_row(index_tuple, data): ind = pd.MultiIndex.from_tuples([index_tuple], names=["category", "name"]) cols = ["value", "comment"] df = pd.DataFrame(index=ind, columns=cols, data=[data]) return df
[docs]def observable_prob_template(observables): to_concat = [] for i in range(len(observables)): probs = np.random.uniform(size=observables[i]) probs = normalize_probabilities(probs) for j in range(observables[i]): ind = (f"observable_observable_{i}_{j}", "probability") dat = [probs[j], f"Probability of observable {i} being level choice {j}"] to_concat.append(_base_row(ind, dat)) out = pd.concat(to_concat, axis=0, sort=False) return out
[docs]def observable_coeffs_template(observables, template): index = { x for x in template.index.get_level_values(0) if "nonpec" in x or "wage" in x } labels = generate_obs_labels(observables, index) to_concat = [] for y in labels: dat = [np.random.uniform(), f"effect of {y[1]}"] to_concat.append(_base_row(y, dat)) out = pd.concat(to_concat, axis=0, sort=False) return out
[docs]def generate_obs_labels(observables, index): names = [] for x, _ in enumerate(observables): for y in range(observables[x]): names.append(f"observable_{x}_{y}") out = list(itertools.product(index, names)) return out