Source code for respy.pre_processing.process_covariates
"""This module comprises all functions which process the definition of covariates."""
import copy
[docs]def remove_irrelevant_covariates(options, params):
"""Identify the relevant covariates.
We try to make every model as sparse as possible which means discarding covariates
which are irrelevant. The immediate benefit is that memory consumption and start-up
costs are reduced.
An advantage further downstream is that the number of lagged choices is inferred
from covariates. Eliminating irrelevant covariates might reduce the number of
implemented lags.
The function catches all relevant "high-level" covariates by looking at the `"name"`
index in `params`. "Low-level" covariates which are relevant but not included in the
index are recursively found by checking whether covariates are used in the formula
of relevant covariates.
See also
--------
separate_covariates_into_core_dense_mixed
"""
options = copy.deepcopy(options)
covariates = options["covariates"]
# Collect initial relevant covariates from params.
relevant_covs = {}
for cov in covariates:
if cov in params.index.get_level_values("name"):
relevant_covs[cov] = covariates[cov]
# Start by iterating over initial covariates and add variables which are used to
# compute them and repeat the process.
n_relevant_covariates_changed = True
while n_relevant_covariates_changed:
n_relevant_covariates = len(relevant_covs)
for cov in covariates:
for relevant_cov in relevant_covs:
if cov in relevant_covs[relevant_cov]:
# Append the covariate to the front such that nested covariates are
# created in the beginning.
relevant_covs = {cov: covariates[cov], **relevant_covs}
n_relevant_covariates_changed = n_relevant_covariates != len(relevant_covs)
options["covariates"] = relevant_covs
return options
[docs]def separate_covariates_into_core_dense_mixed(options, optim_paras):
"""Separate covariates into distinct groups.
Covariates are separated into three groups.
1. Covariates which use only information from the core state space.
2. Covariates which use only information from the dense state space.
3. Covariates which use information from the core and the dense state space.
Parameters
----------
options : dict
Contains among other information covariates and their formulas.
optim_paras : dict
Contains information to separate the core and dense state space.
Returns
-------
options : dict
Contains three new covariate categories.
"""
options = copy.deepcopy(options)
covariates = options["covariates"]
# Define two sets with default covariates for the core and dense state space.
core_covs = set(
["period"]
+ [f"exp_{choice}" for choice in optim_paras["choices_w_exp"]]
+ [f"lagged_choice_{i}" for i in range(1, optim_paras["n_lagged_choices"] + 1)]
)
dense_covs = set(optim_paras["observables"])
if optim_paras["n_types"] >= 2:
dense_covs |= set(
["type"] + [f"type_{i}" for i in range(2, optim_paras["n_types"] + 1)]
)
detailed_covariates = {
cov: {"formula": covariates[cov], "depends_on": set()} for cov in covariates
}
# Loop over all covariates and add them two the sets if the formula contains
# covariates from the sets. If both lengths of the sets do not change anymore, stop.
n_core_covs_changed = True
n_dense_covs_changed = True
while n_core_covs_changed or n_dense_covs_changed:
n_core_covs = len(core_covs)
n_dense_covs = len(dense_covs)
for cov, formula in covariates.items():
matches_core = [i for i in core_covs if i in formula]
if matches_core:
core_covs.update([cov])
matches_dense = [i for i in dense_covs if i in formula]
if matches_dense:
dense_covs.update([cov])
detailed_covariates[cov]["depends_on"] |= set(matches_core + matches_dense)
n_core_covs_changed = n_core_covs != len(core_covs)
n_dense_covs_changed = n_dense_covs != len(dense_covs)
only_core_covs = core_covs - dense_covs
only_dense_covs = dense_covs - core_covs
independent_covs = set(covariates) - core_covs - dense_covs
options["covariates_core"] = {
cov: detailed_covariates[cov]
for cov in only_core_covs | independent_covs
if cov in detailed_covariates
}
options["covariates_dense"] = {
cov: detailed_covariates[cov]
for cov in only_dense_covs
if cov in detailed_covariates
}
options["covariates_mixed"] = {
cov: detailed_covariates[cov] for cov in core_covs & dense_covs
}
# We cannot overwrite `options["covariates"]`.
options["covariates_all"] = detailed_covariates
return options
[docs]def identify_necessary_covariates(dependents, definitions):
"""Identify covariates necessary to compute `dependents`.
This function can be used if only a specific subset of covariates is necessary and
not all covariates.
See also
--------
respy.likelihood._compute_x_beta_for_type_probabilities
"""
dependents = {dependents} if isinstance(dependents, str) else set(dependents)
new_dependents = dependents.copy()
while new_dependents:
deps = list(new_dependents)
new_dependents = set()
for dependent in deps:
if dependent in definitions and definitions[dependent]["depends_on"]:
dependents |= definitions[dependent]["depends_on"]
new_dependents |= definitions[dependent]["depends_on"]
else:
dependents.remove(dependent)
covariates = {dep: definitions[dep] for dep in dependents}
return covariates