import pandas as pd
import numpy as np
from scipy.linalg import toeplitz
def propensity_eq(x, cov_mat, R2_d=0.5):
= x.shape[1]
dim_x = [1 / (k**2) for k in range(1, dim_x + 1)]
beta = np.dot(np.dot(cov_mat, beta), beta)
b_sigma_b = np.sqrt(np.pi**2 / 3. * R2_d/((1-R2_d) * b_sigma_b))
c_d = np.exp(np.dot(x, np.multiply(beta, c_d)))
xx = (xx/(1+xx))
propensity_score return propensity_score
def potential_outcome_eq(x, cov_mat, R2_y=0.5, theta=0):
= x.shape[1]
dim_x = [1 / (k**2) for k in range(1, dim_x + 1)]
beta = np.dot(np.dot(cov_mat, beta), beta)
b_sigma_b = np.sqrt(R2_y/((1-R2_y) * b_sigma_b))
c_y = 0
d = d * theta + d * np.dot(x, np.multiply(beta, c_y))
Y_0 = 1
d = d * theta + d * np.dot(x, np.multiply(beta, c_y))
Y_1 return Y_0, Y_1
def make_irm_data(n_obs=500, dim_x=2, theta=0, R2_d=0.5, R2_y=0.5):
"""
Variation of DoubleML's make_irm_data function. Generates data for the interactive regression model (IRM) example and returns potential values
References
----------
Belloni, A., Chernozhukov, V., Fernández‐Val, I. and Hansen, C. (2017). Program Evaluation and Causal Inference With
High‐Dimensional Data. Econometrica, 85: 233-298.
"""
# inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
= np.random.uniform(size=[n_obs, ])
v = np.random.standard_normal(size=[n_obs, ])
zeta = toeplitz([np.power(0.5, k) for k in range(dim_x)])
cov_mat = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
x = propensity_eq(x, cov_mat, R2_d)
propensity_score = 1. * (propensity_score > v)
D = potential_outcome_eq(x, cov_mat, R2_y, theta)
Y_0, Y_1 = (1 - D) * Y_0 + D * Y_1 + zeta
Y = pd.DataFrame(x, columns=[f'x_{i}' for i in range(x.shape[1])])
x_flat = pd.concat([pd.DataFrame({'Y_0': Y_0, 'Y_1': Y_1, 'ps': propensity_score, 'D': D, 'Y': Y}), x_flat], axis=1)
df_orcl = pd.concat([pd.DataFrame({'D': D, 'Y': Y}), x_flat], axis=1)
df
= {'df': df, 'df_orcl': df_orcl}
results
return results
Simulation Example: Propensity score trimming and reference population
Motivation example: Heterogeneous treatment effect and propensity score adjustments
Parameters \{a_1,a_2,b_1,b_2,c_1,c_2\} might be modified for tuning later.
123)
np.random.seed(
= 0.35
R2_d= 0.35
R2_y= 2
dim_x = 0
theta = 1000
n_obs = make_irm_data(n_obs=n_obs, dim_x=dim_x, theta=theta, R2_d=R2_d, R2_y=R2_y) data_dict
# Generate a histogram of propensity scores for treated and non-treated
import matplotlib.pyplot as plt
'df_orcl'].loc[data_dict['df_orcl']['D'] == 1, 'ps'], bins=20, alpha=0.5, label='Treated')
plt.hist(data_dict['df_orcl'].loc[data_dict['df_orcl']['D'] == 0, 'ps'], bins=20, alpha=0.5, label='Non-treated')
plt.hist(data_dict['Propensity Score')
plt.xlabel('Frequency')
plt.ylabel('Histogram of Propensity Scores')
plt.title(
= data_dict['df_orcl'].loc[data_dict['df_orcl']['D'] == 1, 'ps'].min()
min_ps_treated = data_dict['df_orcl'].loc[data_dict['df_orcl']['D'] == 1, 'ps'].max()
max_ps_treated = data_dict['df_orcl'].loc[data_dict['df_orcl']['D'] == 0, 'ps'].min()
min_ps_control = data_dict['df_orcl'].loc[data_dict['df_orcl']['D'] == 0, 'ps'].max()
max_ps_control
# Compute the minimum and maximum propensity scores for treated and non-treated
print(min_ps_treated)
print(max_ps_treated)
# add vertical line at min_ps_treated and add label with value for min_ps_treated
=min_ps_treated, color='r', linestyle='--', label=f'Min PS Treated: {min_ps_control:.2f}')
plt.axvline(x=max_ps_control, color='r', linestyle='--', label=f'Max PS Control: {max_ps_control:.2f}')
plt.axvline(x# add legend below plot
='upper center', bbox_to_anchor=(0.5, -0.05), shadow=True, ncol=2)
plt.legend(loc
plt.show()
0.058309203265653316
0.9883990939718879
Summary of the propensity scores. This probably could be tuned more, but you can see that some areas have p_{treat} < 0.1 and thus trimming etc. might be advisable.
# summary of prop score
print(data_dict['df_orcl'].groupby('D')['ps'].describe())
count mean std min 25% 50% 75% \
D
0.0 511.0 0.375940 0.220918 0.018597 0.189679 0.355096 0.539133
1.0 489.0 0.624728 0.215901 0.058309 0.471686 0.648537 0.808571
max
D
0.0 0.926319
1.0 0.988399
Plot values of propensity score against values of X_1 and X_2 to illustrate what cutting off / clipping / discarding observations w.r.t. to the propensity score would imply in terms of the values of X_1 and X_2.
The surface plot shows that for high values of X_2, the propensity score varies a lot with X_1. Thus, we have “areas” of Overlap problems.
# create a plotly surface plot that maps the propensity value against all combinations of X_1 and X_2
import plotly.graph_objects as go
= toeplitz([0.5**k for k in range(2)])
cov_mat # Set up a grid for X_1 and X_2
= np.linspace(-3, 3, 100)
x1 = np.linspace(-3, 3, 100)
x2 = np.meshgrid(x1, x2)
X_1, X_2
# prepare array for propensity scores (100x100)
= np.zeros((x1.shape[0], x2.shape[0]))
propensity_score
# for each combination of X_1 and X_2, compute the propensity score
for i in range(x1.shape[0]):
for j in range(x2.shape[0]):
= np.array([X_1[i,j], X_2[i,j]]).reshape(1,2)
x = toeplitz([np.power(0.5, k) for k in range(dim_x)])
cov_mat = propensity_eq(x, cov_mat, R2_d)
ps_score = ps_score propensity_score[i,j]
/tmp/ipykernel_27680/3782873549.py:19: DeprecationWarning:
Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
= go.Figure(data=[go.Surface(z=propensity_score, x=X_1, y=X_2)])
fig ='Propensity score surface plot', autosize=False, width=500, height=500)
fig.update_layout(title
# Show plot
fig.show()
# Create a df with propensity score and values for X_1 and X_2
= pd.DataFrame({'X_1': X_1.flatten(), 'X_2': X_2.flatten(), 'ps': propensity_score.flatten()}) propensity_df
We implement trimming for propensity scores smaller than 0.1
# indicator for being trimmed from above, i.e., if ps > 1-trim_val
= 0.1
trim_val 'trim_top'] = np.where(propensity_df['ps'] > 1-trim_val, 1, 0)
propensity_df['trim_bottom'] = np.where(propensity_df['ps'] < trim_val, 1, 0) propensity_df[
18.75\% of the observations get trimmed from above (because their p_{treat} > 0.9 ) and 18.75\% (because p_{treat} <0.1).
# share of trimmed
"trim_top", "trim_bottom"]].mean()*100 propensity_df[[
trim_top 18.75
trim_bottom 18.75
dtype: float64
# create a heatmap uisng propensity_df
import plotly.express as px
= px.density_heatmap(propensity_df, x='X_1', y='X_2', z='ps', title='Propensity score heatmap', histfunc = 'avg', nbinsx=100, nbinsy=100)
fig
# Compute values of X_1 and X_2 for which the propensity score is greater than trim_val
= 0.1
trim_val 'trimmed'] = (propensity_df['ps'] < trim_val).astype(int) | (propensity_df['ps'] > 1-trim_val).astype(int)
propensity_df[= propensity_df['trimmed'].mean()
share_trimmed
# Add title to the plot
=f'Propensity score heatmap with trim_val = {trim_val}, Share of trimmed obs: {share_trimmed*100}%')
fig.update_layout(title
fig.show()
The next plot shows the propensity score only for the untrimmed subpopulation. Note: On the top left observations are missing, that we “trimmed”
# Heat map for only untrimmed observations
# filter out trimmed observations
= propensity_df[propensity_df['trimmed'] == 0]
propensity_df_untrimmed
= px.density_heatmap(propensity_df_untrimmed, x='X_1', y='X_2', z='ps', title='Propensity score heatmap for untrimmed observations', histfunc = 'avg', nbinsx=100, nbinsy=100)
fig
# Add title to the plot
=f'Propensity score heatmap for untrimmed observations') fig.update_layout(title
Next, we want to look at the potential outcomes for the different areas of X_1, X_2. We see, that the average Y_0 and Y_1 is very high in the trimmed area - does this lead to problems maybe? Is the population still the same even though we trimmed?
# Add potential outcomes to propensity_df
= potential_outcome_eq(x = propensity_df.loc[:, ['X_1', 'X_2']], cov_mat=cov_mat, R2_y=R2_y, theta=theta)
Y_0, Y_1 'Y_0'] = Y_0
propensity_df['Y_1'] = Y_1 propensity_df[
# Heatmap for Y_0 based on untrimmed obs only
= propensity_df[propensity_df['trimmed'] == 0]
propensity_df_untrimmed = px.density_heatmap(propensity_df_untrimmed, x='X_1', y='X_2', z='Y_0', title='Potential outcome Y_0 heatmap for untrimmed observations', histfunc = 'avg', nbinsx=100, nbinsy=100)
fig
# Add title to the plot
=f'Potential outcome Y_0 heatmap for untrimmed observations')
fig.update_layout(title
fig.show()
# Heatmap for Y_1 based on untrimmed obs only
= px.density_heatmap(propensity_df_untrimmed, x='X_1', y='X_2', z='Y_1', title='Potential outcome Y_1 heatmap for untrimmed observations', histfunc = 'avg', nbinsx=100, nbinsy=100)
fig
# Add title to the plot
=f'Potential outcome Y_1 heatmap for untrimmed observations')
fig.update_layout(title
# Heatmap for Y based on untrimmed obs only
fig.show()
Compare untrimmed to trimmed data in terms of distribution of X_1 and X_2. You can tell, that we bite out a corner of the distribution :)
# Plot joint density of X_1 and X_2
# Creating a 2D histogram
= px.density_heatmap(propensity_df, x='X_1', y='X_2', nbinsx=30, nbinsy=30, color_continuous_scale='Blues')
fig
# Updating layout for better readability
fig.update_layout(='2D Histogram using Plotly',
title='X-axis',
xaxis_title='Y-axis',
yaxis_title=dict(title='Counts')
coloraxis_colorbar
)
# Show the plot
fig.show()
# Plot joint density of X_1 and X_2
# Creating a 2D histogram
= px.density_heatmap(propensity_df_untrimmed, x='X_1', y='X_2', nbinsx=30, nbinsy=30, color_continuous_scale='Blues')
fig
# Updating layout for better readability
fig.update_layout(='2D Histogram using Plotly',
title='X-axis',
xaxis_title='Y-axis',
yaxis_title=dict(title='Counts')
coloraxis_colorbar
)
# Show the plot
fig.show()
Next steps/ open questions
- What could be done next is to simulate data, estimate with DoubleML and then assess how the trimming affects the distribution of X_1 and X_2 as compared to the reference population. Do different trimming rules (truncation, discarding) lead to different results?
- So far, we considered a simple linear model with a constant effect of D on Y. If the effect (i.e. Y_1 - Y_0 is allowed to vary with X_1 and X_2, trimming/discarding might lead to non-random sample selection
- The example was based on uniformly distributed covariates… it might make sense to look at other distributions, e.g. joint normal
Example:
from doubleml import DoubleMLIRM, DoubleMLData
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.model_selection import cross_val_predict
- “Truncation”
1234)
np.random.seed(= []
results = 1
num_repetitions for i in range(num_repetitions):
= []
result_i = 1
true_theta = make_irm_data(n_obs=n_obs, dim_x=dim_x, theta=theta, R2_d=R2_d, R2_y=R2_y)
df_dict = DoubleMLData(df_dict['df'], 'Y', 'D', ['x_0', 'x_1'])
data = LGBMRegressor(verbose = -1), LGBMClassifier(verbose = -1)
ml_g, ml_m for trimming_value in [0.001, 0.01, 0.05, 0.1]:
= DoubleMLIRM(data, ml_g, ml_m, trimming_threshold=trimming_value)
dml_obj
dml_obj.fit()0] - true_theta) # append bias of trimming value
result_i.append(dml_obj.coef[# append result of repetition i results.append(result_i)
= pd.DataFrame(results, columns=["trim 0.001", "trim 0.01", "trim 0.05", "trim 0.1"])
result_frame result_frame
trim 0.001 | trim 0.01 | trim 0.05 | trim 0.1 | |
---|---|---|---|---|
0 | -0.528917 | -0.744113 | -0.915383 | -0.921815 |
- “Discarding”
1234)
np.random.seed(= []
results = 1
num_repetitions for i in range(num_repetitions):
= []
result_i # make data
= make_irm_data(n_obs=n_obs, dim_x=dim_x, theta=theta, R2_d=R2_d, R2_y=R2_y)
df_dict = df_dict['df']
data # make estimators
= LGBMRegressor(verbose = -1), LGBMClassifier(verbose = -1)
ml_g, ml_m # estimate pscore
"pscore_est"] = cross_val_predict(ml_m, data.drop(columns=["Y", "D"]), data.D, method='predict_proba')[:,1]
data[for trimming_value in [0.001, 0.01, 0.05, 0.1]:
# discard poor overlap
= data.loc[~((data["pscore_est"] < trimming_value) | (data["pscore_est"] > 1 - trimming_value))].drop(columns=["pscore_est"])
data_trimmed # create dml data obj
= DoubleMLData.from_arrays(x=data_trimmed.drop(columns=["Y", "D"]),
dml_data =data_trimmed["Y"],
y=data_trimmed["D"])
d= DoubleMLIRM(dml_data, ml_g, ml_m, trimming_threshold=1e-12) # set trimming very low because we already discarded
dml_obj
dml_obj.fit()0] - true_theta) # append bias of trimming value
result_i.append(dml_obj.coef[# append result of repetition i results.append(result_i)
"ml_m"].flatten()>0.1 dml_obj.predictions[
array([ True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
False, True, True, True, True, False, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True, True, True, True, False,
True, True, True, True, False, True, True, True, True,
False, False, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
False, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, False, True, False, True, False, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
False, False, False, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, False, True, True, False, True, True, True, True,
False, True, True, True, True, True, True, True, False,
False, True, True, True, True, False, False, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, False, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, False, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, False, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, True, True, True, False, True, True,
True, False, True, True, True, False, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, False, False, True, True, True, True, True, True,
True, True, False, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, False, True, True, True, False, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, False, False, True, False, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, False, False, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, True, True, True, True, True, True,
False, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
False, False, True, True, True, True, False, False, True,
True, True, True, True, True, False, False, True, True,
True, True, True, True, True, False, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, False, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, False, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, False, False,
True, True, True])
= pd.DataFrame(results, columns=["trim 0.001", "trim 0.01", "trim 0.05", "trim 0.1"])
result_frame result_frame
trim 0.001 | trim 0.01 | trim 0.05 | trim 0.1 | |
---|---|---|---|---|
0 | -0.528917 | -1.484421 | -1.070437 | -0.504971 |
TODO: Look at trimmed and untrimmed distributions, look at distributions of potential outcomes, look at bias.