[3]:
import warnings
warnings.filterwarnings("ignore")
from xai_agg import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
[4]:
from IPython.core.display import display, HTML
def display_side_by_side(dfs: list[pd.DataFrame], captions: list[str] = []):
"""Display tables side by side to save vertical space
Input:
dfs: list of pandas.DataFrame
captions: list of table captions
"""
output = ""
for i, df in enumerate(dfs):
caption = captions[i] if i < len(captions) else ""
output += df.style.set_table_attributes("style='display:inline'").set_caption(f"<strong>{caption}</strong>")._repr_html_()
output += "\xa0\xa0\xa0"
display(HTML(output))
Preprocess the data
One-hot-encode categorical variables, making sure the one-hot-encoded column names are in the format “[FEATURE]_[CATEGORY]”
Make sure all column names are valid python identifiers
[5]:
original_data = pd.read_csv('../data/german_credit_data_updated.csv')
# Dataset overview - German Credit Risk (from Kaggle):
# 1. Age (numeric)
# 2. Sex (text: male, female)
# 3. Job (numeric: 0 - unskilled and non-resident, 1 - unskilled and resident, 2 - skilled, 3 - highly skilled)
# 4. Housing (text: own, rent, or free)
# 5. Saving accounts (text - little, moderate, quite rich, rich)
# 6. Checking account (numeric, in DM - Deutsch Mark)
# 7. Credit amount (numeric, in DM)
# 8. Duration (numeric, in month)
# 9. Purpose (text: car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others)
display(original_data.head())
display(original_data.describe())
display(original_data.info())
# Display the unique values of the categorical features:
print('Unique values of the categorical features:')
for col in original_data.select_dtypes(include='object'):
print(f'\t- {col}: {original_data[col].unique()}')
| Unnamed: 0 | Age | Sex | Job | Housing | Saving accounts | Checking account | Credit amount | Duration | Purpose | Credit Risk | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 67 | male | 2 | own | NaN | little | 1169 | 6 | radio/TV | 1 |
| 1 | 1 | 22 | female | 2 | own | little | moderate | 5951 | 48 | radio/TV | 2 |
| 2 | 2 | 49 | male | 1 | own | little | NaN | 2096 | 12 | education | 1 |
| 3 | 3 | 45 | male | 2 | free | little | little | 7882 | 42 | furniture/equipment | 1 |
| 4 | 4 | 53 | male | 2 | free | little | little | 4870 | 24 | car | 2 |
| Unnamed: 0 | Age | Job | Credit amount | Duration | Credit Risk | |
|---|---|---|---|---|---|---|
| count | 954.000000 | 954.000000 | 954.000000 | 954.000000 | 954.000000 | 954.000000 |
| mean | 476.500000 | 35.501048 | 1.909853 | 3279.112159 | 20.780922 | 1.302935 |
| std | 275.540378 | 11.379668 | 0.649681 | 2853.315158 | 12.046483 | 0.459768 |
| min | 0.000000 | 19.000000 | 0.000000 | 250.000000 | 4.000000 | 1.000000 |
| 25% | 238.250000 | 27.000000 | 2.000000 | 1360.250000 | 12.000000 | 1.000000 |
| 50% | 476.500000 | 33.000000 | 2.000000 | 2302.500000 | 18.000000 | 1.000000 |
| 75% | 714.750000 | 42.000000 | 2.000000 | 3975.250000 | 24.000000 | 2.000000 |
| max | 953.000000 | 75.000000 | 3.000000 | 18424.000000 | 72.000000 | 2.000000 |
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 954 non-null int64
1 Age 954 non-null int64
2 Sex 954 non-null object
3 Job 954 non-null int64
4 Housing 954 non-null object
5 Saving accounts 779 non-null object
6 Checking account 576 non-null object
7 Credit amount 954 non-null int64
8 Duration 954 non-null int64
9 Purpose 954 non-null object
10 Credit Risk 954 non-null int64
dtypes: int64(6), object(5)
memory usage: 82.1+ KB
None
Unique values of the categorical features:
- Sex: ['male' 'female']
- Housing: ['own' 'free' 'rent']
- Saving accounts: [nan 'little' 'quite rich' 'rich' 'moderate']
- Checking account: ['little' 'moderate' nan 'rich']
- Purpose: ['radio/TV' 'education' 'furniture/equipment' 'car' 'business'
'domestic appliances' 'repairs' 'vacation/others']
[6]:
preprocessed_data = original_data.copy()
# For savings and checking accounts, we will replace the missing values with 'none':
preprocessed_data['Saving accounts'].fillna('none', inplace=True)
preprocessed_data['Checking account'].fillna('none', inplace=True)
# Dropping index column:
preprocessed_data.drop(columns=['Unnamed: 0'], inplace=True)
# Using pd.dummies to one-hot-encode the categorical features
preprocessed_data["Job"] = preprocessed_data["Job"].map({0: 'unskilled_nonresident', 1: 'unskilled_resident',
2: 'skilled', 3: 'highlyskilled'})
categorical_features = preprocessed_data.select_dtypes(include='object').columns
numerical_features = preprocessed_data.select_dtypes(include='number').columns.drop('Credit Risk')
print(f'Categorical features: {categorical_features}')
print(f'Numerical features: {numerical_features}')
preprocessed_data = pd.get_dummies(preprocessed_data, columns=categorical_features, dtype='int64')
# Remapping the target variable to 0 and 1:
preprocessed_data['Credit Risk'] = preprocessed_data['Credit Risk'].map({1: 0, 2: 1})
# Make sure all column names are valid python identifiers (important for pd.query() calls):
preprocessed_data.columns = preprocessed_data.columns.str.replace(' ', '_')
preprocessed_data.columns = preprocessed_data.columns.str.replace('/', '_')
display(preprocessed_data.head())
display(preprocessed_data.info())
Categorical features: Index(['Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account',
'Purpose'],
dtype='object')
Numerical features: Index(['Age', 'Credit amount', 'Duration'], dtype='object')
| Age | Credit_amount | Duration | Credit_Risk | Sex_female | Sex_male | Job_highlyskilled | Job_skilled | Job_unskilled_nonresident | Job_unskilled_resident | ... | Checking_account_none | Checking_account_rich | Purpose_business | Purpose_car | Purpose_domestic_appliances | Purpose_education | Purpose_furniture_equipment | Purpose_radio_TV | Purpose_repairs | Purpose_vacation_others | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 67 | 1169 | 6 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 1 | 22 | 5951 | 48 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2 | 49 | 2096 | 12 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | ... | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 3 | 45 | 7882 | 42 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 4 | 53 | 4870 | 24 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 30 columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 30 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Age 954 non-null int64
1 Credit_amount 954 non-null int64
2 Duration 954 non-null int64
3 Credit_Risk 954 non-null int64
4 Sex_female 954 non-null int64
5 Sex_male 954 non-null int64
6 Job_highlyskilled 954 non-null int64
7 Job_skilled 954 non-null int64
8 Job_unskilled_nonresident 954 non-null int64
9 Job_unskilled_resident 954 non-null int64
10 Housing_free 954 non-null int64
11 Housing_own 954 non-null int64
12 Housing_rent 954 non-null int64
13 Saving_accounts_little 954 non-null int64
14 Saving_accounts_moderate 954 non-null int64
15 Saving_accounts_none 954 non-null int64
16 Saving_accounts_quite_rich 954 non-null int64
17 Saving_accounts_rich 954 non-null int64
18 Checking_account_little 954 non-null int64
19 Checking_account_moderate 954 non-null int64
20 Checking_account_none 954 non-null int64
21 Checking_account_rich 954 non-null int64
22 Purpose_business 954 non-null int64
23 Purpose_car 954 non-null int64
24 Purpose_domestic_appliances 954 non-null int64
25 Purpose_education 954 non-null int64
26 Purpose_furniture_equipment 954 non-null int64
27 Purpose_radio_TV 954 non-null int64
28 Purpose_repairs 954 non-null int64
29 Purpose_vacation_others 954 non-null int64
dtypes: int64(30)
memory usage: 223.7 KB
None
[7]:
y = preprocessed_data['Credit_Risk']
X = preprocessed_data.drop(columns='Credit_Risk')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
[8]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'ROC AUC: {roc_auc_score(y_test, y_pred)}')
Accuracy: 0.7696335078534031
ROC AUC: 0.6830357142857143
Applying the Aggregate Explainer
[ ]:
agg_explainer = AggregatedExplainer(
explainer_types=[LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper], # Wrapped explainers whose explanations will be aggregated
model=clf, X_train=X_train, categorical_feature_names=categorical_features, # Model and training data
metrics=['nrc', 'sensitivity_spearman', 'faithfulness_corr'], # Metrics to be considered for the aggregation
noise_gen_args={'encoding_dim': 5, 'epochs': 500}, # Arguments passed to the autoencoder noisy data generator
evaluator_args={"debug": False} # Arguments passed to the evaluator class
)
[11]:
# Apply the aggregate explainer on a sample instance:
sample_idx = 0
agg_explanation = agg_explainer.explain_instance(X_test.iloc[sample_idx])
display_side_by_side([agg_explanation, get_ranked_explanation(agg_explanation)], captions=['Feature importance scores:', 'Feature importance ranking:'])
| feature | score | |
|---|---|---|
| 0 | Duration | 0.809114 |
| 1 | Purpose_furniture_equipment | 0.568681 |
| 2 | Checking_account_none | 0.497863 |
| 3 | Age | 0.232698 |
| 4 | Checking_account_little | 0.111593 |
| 5 | Credit_amount | 0.041399 |
| 6 | Checking_account_moderate | 0.039992 |
| 7 | Housing_free | 0.027987 |
| 8 | Sex_female | 0.018932 |
| 9 | Saving_accounts_moderate | 0.015219 |
| 10 | Sex_male | 0.014925 |
| 11 | Job_highlyskilled | 0.009129 |
| 12 | Housing_own | 0.008308 |
| 13 | Purpose_car | 0.008169 |
| 14 | Saving_accounts_little | 0.006912 |
| 15 | Purpose_radio_TV | 0.006733 |
| 16 | Job_skilled | 0.006117 |
| 17 | Saving_accounts_none | 0.006015 |
| 18 | Job_unskilled_resident | 0.005921 |
| 19 | Housing_rent | 0.004658 |
| 20 | Job_unskilled_nonresident | 0.004482 |
| 21 | Saving_accounts_quite_rich | 0.002573 |
| 22 | Purpose_education | 0.002123 |
| 23 | Purpose_repairs | 0.001111 |
| 24 | Purpose_business | 0.000986 |
| 25 | Purpose_vacation_others | 0.000942 |
| 26 | Checking_account_rich | 0.000937 |
| 27 | Saving_accounts_rich | 0.000645 |
| 28 | Purpose_domestic_appliances | 0.000545 |
| feature | rank | |
|---|---|---|
| 0 | Duration | 1 |
| 1 | Purpose_furniture_equipment | 2 |
| 2 | Checking_account_none | 3 |
| 3 | Age | 4 |
| 4 | Checking_account_little | 5 |
| 5 | Credit_amount | 6 |
| 6 | Checking_account_moderate | 6 |
| 7 | Housing_free | 7 |
| 8 | Sex_female | 7 |
| 9 | Saving_accounts_moderate | 8 |
| 10 | Sex_male | 8 |
| 11 | Job_highlyskilled | 8 |
| 12 | Housing_own | 8 |
| 13 | Purpose_car | 8 |
| 14 | Saving_accounts_little | 8 |
| 15 | Purpose_radio_TV | 8 |
| 16 | Job_skilled | 8 |
| 17 | Saving_accounts_none | 8 |
| 18 | Job_unskilled_resident | 8 |
| 19 | Housing_rent | 9 |
| 20 | Job_unskilled_nonresident | 9 |
| 21 | Saving_accounts_quite_rich | 9 |
| 22 | Purpose_education | 9 |
| 23 | Purpose_repairs | 9 |
| 24 | Purpose_business | 9 |
| 25 | Purpose_vacation_others | 9 |
| 26 | Checking_account_rich | 9 |
| 27 | Saving_accounts_rich | 9 |
| 28 | Purpose_domestic_appliances | 9 |
Get information on the aggregate explainer’s last explanation
With the get_last_explanation_info() method, you can get a dataframe that contains each of the aggregated explanation models’ performances on each of the metrics used to evaluate them. You are also given the weight each explanation model got from the MCDM algorithm, which is passed on to the rank aggregation step.
[24]:
agg_explainer.get_last_explanation_info()
[24]:
| nrc | sensitivity_spearman | faithfulness_corr | weight | |
|---|---|---|---|---|
| LimeWrapper | 42.504547 | 0.839113 | 0.110158 | 0.533547 |
| ShapTabularTreeWrapper | 43.531226 | 0.964205 | 0.167030 | 0.438026 |
| AnchorWrapper | 42.491709 | 0.570668 | 0.448899 | 0.585786 |
[25]:
display_side_by_side(agg_explainer.last_explanation_components, captions=['LIME explanation:', 'SHAP explanation:', 'Anchor explanation:'])
| feature | score | |
|---|---|---|
| 0 | Checking_account_none | 0.060765 |
| 1 | Duration | 0.056665 |
| 2 | Checking_account_little | 0.035393 |
| 3 | Age | 0.028167 |
| 4 | Checking_account_moderate | 0.017343 |
| 5 | Housing_own | 0.013374 |
| 6 | Saving_accounts_little | 0.009622 |
| 7 | Credit_amount | 0.008884 |
| 8 | Housing_rent | 0.008133 |
| 9 | Sex_male | 0.007520 |
| 10 | Purpose_radio_TV | 0.006844 |
| 11 | Purpose_car | 0.006059 |
| 12 | Saving_accounts_none | 0.005942 |
| 13 | Housing_free | 0.005901 |
| 14 | Sex_female | 0.004537 |
| 15 | Saving_accounts_rich | 0.004407 |
| 16 | Purpose_education | 0.003162 |
| 17 | Job_skilled | 0.002704 |
| 18 | Saving_accounts_moderate | 0.002680 |
| 19 | Purpose_vacation_others | 0.002339 |
| 20 | Checking_account_rich | 0.002247 |
| 21 | Job_unskilled_nonresident | 0.001711 |
| 22 | Purpose_repairs | 0.001627 |
| 23 | Purpose_furniture_equipment | 0.001377 |
| 24 | Purpose_domestic_appliances | 0.001172 |
| 25 | Job_highlyskilled | 0.001144 |
| 26 | Saving_accounts_quite_rich | 0.001032 |
| 27 | Job_unskilled_resident | 0.000776 |
| 28 | Purpose_business | 0.000121 |
| feature | score | |
|---|---|---|
| 0 | Duration | 0.051965 |
| 1 | Checking_account_none | 0.048818 |
| 2 | Age | 0.044427 |
| 3 | Checking_account_little | 0.030740 |
| 4 | Checking_account_moderate | 0.025005 |
| 5 | Credit_amount | 0.018809 |
| 6 | Saving_accounts_moderate | 0.011132 |
| 7 | Purpose_furniture_equipment | 0.009065 |
| 8 | Sex_female | 0.007021 |
| 9 | Purpose_car | 0.006882 |
| 10 | Housing_free | 0.006844 |
| 11 | Saving_accounts_none | 0.004482 |
| 12 | Sex_male | 0.004218 |
| 13 | Job_unskilled_resident | 0.004134 |
| 14 | Saving_accounts_little | 0.003987 |
| 15 | Job_highlyskilled | 0.002708 |
| 16 | Saving_accounts_quite_rich | 0.001809 |
| 17 | Purpose_education | 0.001744 |
| 18 | Job_skilled | 0.001720 |
| 19 | Housing_own | 0.001668 |
| 20 | Purpose_repairs | 0.001397 |
| 21 | Purpose_vacation_others | 0.000760 |
| 22 | Purpose_business | 0.000601 |
| 23 | Saving_accounts_rich | 0.000569 |
| 24 | Checking_account_rich | 0.000486 |
| 25 | Purpose_radio_TV | 0.000356 |
| 26 | Housing_rent | 0.000178 |
| 27 | Purpose_domestic_appliances | 0.000000 |
| 28 | Job_unskilled_nonresident | 0.000000 |
| feature | score | |
|---|---|---|
| 0 | Purpose_furniture_equipment | 0.813893 |
| 1 | Age | 0.503277 |
| 2 | Duration | 0.433814 |
| 3 | Sex_female | 0.313237 |
| 4 | Housing_own | 0.296199 |
| 5 | Checking_account_little | 0.283093 |
| 6 | Saving_accounts_rich | 0.000000 |
| 7 | Purpose_repairs | 0.000000 |
| 8 | Purpose_radio_TV | 0.000000 |
| 9 | Purpose_education | 0.000000 |
| 10 | Purpose_domestic_appliances | 0.000000 |
| 11 | Purpose_car | 0.000000 |
| 12 | Purpose_business | 0.000000 |
| 13 | Checking_account_rich | 0.000000 |
| 14 | Checking_account_none | 0.000000 |
| 15 | Checking_account_moderate | 0.000000 |
| 16 | Saving_accounts_none | 0.000000 |
| 17 | Saving_accounts_quite_rich | 0.000000 |
| 18 | Credit_amount | 0.000000 |
| 19 | Saving_accounts_moderate | 0.000000 |
| 20 | Saving_accounts_little | 0.000000 |
| 21 | Housing_rent | 0.000000 |
| 22 | Housing_free | 0.000000 |
| 23 | Job_unskilled_resident | 0.000000 |
| 24 | Job_unskilled_nonresident | 0.000000 |
| 25 | Job_skilled | 0.000000 |
| 26 | Job_highlyskilled | 0.000000 |
| 27 | Sex_male | 0.000000 |
| 28 | Purpose_vacation_others | 0.000000 |
Evaluating the aggregate explainer
The ExplanationModelEvaluator Class
This class holds all definitions for the metrics used to evaluate the explanation models. The aggregate explainer maintains an instance of this class in order to use its evaluations in the aggregation process. It is designed so that it can be used on any explainer that follows the interface and behavior conventions of the explainers.py file.
Using the internal ExplanationModelEvaluator instance
In order to be used, the ExplanationModelEvaluator class must be instantiated and its init() method must be called. This process, however, is somewhat time-consuming, since one of the metrics defined by this class relies on generating a noisy variation of the training data, and, to do that, an autoencoder is trained with tensorflow.
However, this is usually not necessary, since the AggregateExplainer class maintains its own instance of the ExplanationModelEvaluator class, which can be used normally.
[12]:
# ++ Usual instantiation of the ExplanationModelEvaluator class:
#
# evaluator = ExplanationModelEvaluator(clf, X_train, categorical_features)
# evaluator.init() # Takes some time to train the autoencoder
# ++ Or, grab the one maintained by the AggregatedExplainer:
evaluator = agg_explainer.xai_evaluator
[WORKAROUND] Applying the sensitivity metric to the aggregate explainer:
One of the metrics defined in the ExplanationModelEvaluator class is the sensitivity metric. The way it works requires it to create several new instances of the explanation model being evaluated, since they each need to be fit to a different noisy variation of the training data. This process is very slow, and therefore multiprocessing is used in the sensitivity() function to distribute the workload. This, however, poses an issue when evaluating the sensitivity of the aggregate explainer
model, since it may also use the sensitivity metric itself to perform the aggregation, which means a child process would have to create another child process, which usually is not allowed.
As of now, in order to apply the sensitivity metric to the aggregate explainer, you must use a variation of its implementation that does the calculation without multiprocessing. A sequential version of the sensitivity() metric is provided by the _sensitivity_sequential() function.
[ ]:
evaluator._sensitivity_sequential(
agg_explainer,
X_test.iloc[sample_idx],
extra_explainer_params={ # Must specify everything the explainer needs to be instantiated
"explainer_types": [LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper],
"evaluator": agg_explainer.xai_evaluator # Remember to resue the same evaluator instance, otherwise the autoencoder will be retrained for every iteration
},
iterations=3,
)
0.9400656814449916
Full evalution of the aggregate explainer
Here’s one way of evaluating the aggregate explainer and comparing it to the explainers whose explanations were aggregated. In this example, the aggregate explainer was evaluated with the same metrics it used to internally evaluate each of the component models. The get_last_explanation_info() function was used to retrieve the metrics that were calculated internally, so they aren’t calculated twice.
[15]:
faithfulness = evaluator.faithfullness_correlation(agg_explainer, X_test.iloc[sample_idx])
sensitivity = evaluator._sensitivity_sequential( # sequential version of sensitivity must be used at this time
agg_explainer, X_test.iloc[sample_idx],
extra_explainer_params={
"explainer_types": [LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper],
"evaluator": agg_explainer.xai_evaluator
},
iterations=10
)
nrc = evaluator.nrc(agg_explainer, X_test.iloc[sample_idx])
metrics = agg_explainer.get_last_explanation_info().drop(columns='weight')
metrics.at[AggregatedExplainer.__name__, 'faithfulness_corr'] = faithfulness
metrics.at[AggregatedExplainer.__name__, 'sensitivity_spearman'] = sensitivity
metrics.at[AggregatedExplainer.__name__, 'nrc'] = nrc
[16]:
metrics
[16]:
| nrc | sensitivity_spearman | faithfulness_corr | |
|---|---|---|---|
| LimeWrapper | 46.152620 | 0.859212 | 0.355147 |
| ShapTabularTreeWrapper | 42.648201 | 0.954843 | 0.154678 |
| AnchorWrapper | 18.442814 | 0.668667 | 0.079319 |
| AggregatedExplainer | 44.579487 | 0.913744 | 0.320685 |
Using the xai_agg.exp_utils.evaluate_aggregate_explainer() function
Utility function to evaluate the aggregate explainer, varying its settings. For each of the aggregate explainer’s parameters (explainer components, mcdm algorighm, aggregation algorithm), the function accepts a list of possible values; it’ll iterate over every possible value combination, checking n_instances, and will return the results as a list of lists of dataframes, one dataframe for each instance check, and one list of dataframes for each setting configuration.
[ ]:
from xai_agg.exp_utils import evaluate_aggregate_explainer
results, metadata = evaluate_aggregate_explainer(
clf, X_train, X_test, categorical_features, # Model and data
explainer_components_sets=[[LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper]], # Wrapped explainer sets to be tested
mcdm_algs=[pymcdm.methods.TOPSIS()], # MCDM algorithms to be tested
aggregation_algs=["wsum"], # Aggregation algorithms to be tested
metrics_sets=[['nrc', 'sensitivity_spearman', 'faithfulness_corr']], # Metric sets to be tested
n_instances=1, # Number of instances per setting to run the evaluation on
mp_jobs=5 # Number of jobs to run in parallel (DECREASE THIS VALUE WHEN LOW RAM IS AVAILABLE)
)
[9]:
experiment_run = ExperimentRun(metadata, results)
display(experiment_run.results)
display(experiment_run.metadata)
[[ nrc sensitivity_spearman faithfulness_corr
LimeWrapper 45.304455 0.838916 0.182748
ShapTabularTreeWrapper 44.518230 1.000000 0.240986
AnchorWrapper 35.929599 0.616926 0.326659
AggregateExplainer 48.324269 0.881232 0.286450]]
{'indexes': array([110]),
'configs': [{'explainer_components': [xai_agg.explainers.LimeWrapper,
xai_agg.explainers.ShapTabularTreeWrapper,
xai_agg.explainers.AnchorWrapper],
'metrics': ['nrc', 'sensitivity_spearman', 'faithfulness_corr'],
'mcdm_alg': <pymcdm.methods.topsis.TOPSIS at 0x7becc43bf9a0>,
'aggregation_alg': 'wsum'}]}
[10]:
# Get mean results for a specific setting:
desired_setting = 0
get_expconfig_mean_results(experiment_run, desired_setting)
[10]:
| nrc | sensitivity_spearman | faithfulness_corr | |
|---|---|---|---|
| AggregateExplainer | 48.324269 | 0.881232 | 0.286450 |
| AnchorWrapper | 35.929599 | 0.616926 | 0.326659 |
| LimeWrapper | 45.304455 | 0.838916 | 0.182748 |
| ShapTabularTreeWrapper | 44.518230 | 1.000000 | 0.240986 |