import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn import linear_model
from sklearn import tree
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
import matplotlib.cm as cm
from sklearn.decomposition import PCA
from ELib import *
import warnings
warnings.filterwarnings('ignore')
import ResidualDecomposition as RD
np.random.seed(0)
data = pd.read_csv("../../Data/applied/Metallic_Glass_Forming_with_features.csv")
data.head()
Material compositions | main_element | Trg | Density_composition_average | IsBoron_composition_average | IsDBlock_composition_average | IsTransitionMetal_composition_average | NdValence_composition_average | NValance_composition_average | HeatVaporization_max_value | ... | NdValence_difference | NsUnfilled_difference | valence_difference | Site1_Density | Site1_HeatCapacityMass | Site1_HeatFusion | Site1_IsDBlock | Site1_IsTransitionMetal | Site1_NdValence | Site1_SpecificHeatCapacity | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Ag10Cu35Zr55 | Zr | 0.534 | 0.579069 | -0.5856 | 1.029882 | 1.029882 | 0.589536 | 0.217571 | 0.563915 | ... | 0.473852 | 0.857849 | 0.264503 | 1.091651 | -0.887077 | -0.454471 | 0.718014 | 0.718014 | 1.31768 | -0.884994 |
1 | Ag10Cu50Zr40 | Cu | 0.608 | 0.748401 | -0.5856 | 1.029882 | 1.029882 | 1.176250 | 0.653969 | 0.563915 | ... | 0.473852 | 0.857849 | 0.264503 | 1.091651 | -0.887077 | -0.454471 | 0.718014 | 0.718014 | 1.31768 | -0.884994 |
2 | Ag10Cu55Zr35 | Cu | 0.609 | 0.804845 | -0.5856 | 1.029882 | 1.029882 | 1.371822 | 0.799435 | 0.563915 | ... | 0.473852 | 0.857849 | 0.264503 | 1.091651 | -0.887077 | -0.454471 | 0.718014 | 0.718014 | 1.31768 | -0.884994 |
3 | Ag20Cu40Zr40 | Cu | 0.577 | 0.821973 | -0.5856 | 1.029882 | 1.029882 | 1.176250 | 0.653969 | 0.563915 | ... | 0.473852 | 0.857849 | 0.264503 | 1.091651 | -0.887077 | -0.454471 | 0.718014 | 0.718014 | 1.31768 | -0.884994 |
4 | Ag35Ca65 | Ca | 0.515 | -0.860992 | -0.5856 | -0.998961 | -0.998961 | -0.437214 | -0.613663 | -1.684370 | ... | 1.005911 | 0.857849 | -1.745035 | 1.091651 | -0.887077 | -0.454471 | 0.718014 | 0.718014 | 1.31768 | -0.884994 |
5 rows × 23 columns
Y = data['Trg']
X = data.iloc[:,4:]
X = scale_data(X).values
rf = RandomForestRegressor(n_estimators=25)
rf.fit(X, Y)
features_vals = np.sort(rf.feature_importances_)
features_args = np.argsort(rf.feature_importances_)
plt.xticks(rotation=90)
plt.barh(data.columns[4:][features_args], features_vals)
plt.title("Feature Importances according to Random Forest")
plt.xlabel("Relative Importance Value")
plt.ylabel("Feature Descriptor")
Text(0, 0.5, 'Feature Descriptor')
data_lr = np.genfromtxt("Data/LR_MGD.csv", delimiter=',')
data_lr_composition = data_lr
summed_composition = np.sum(data_lr_composition, axis=0)
data_lr_contribution = ((data_lr_composition.T * -np.sign(summed_composition))).T
data_rf = np.genfromtxt("Data/RF_MGD.csv", delimiter=',')
data_rf_composition = data_rf
summed_composition = np.sum(data_rf_composition, axis=0)
data_rf_contribution = ((data_rf_composition.T * -np.sign(summed_composition))).T
def plot_group(elements_subgroup, data_d, index_i):
g1 = d[d.iloc[:,index_i].isin(elements_subgroup)].index
g2 = d[~d.iloc[:,index_i].isin(elements_subgroup)].index
# plt.xlabel("Composition Mean")
# plt.ylabel("Contribution Mean")
plt.axhline(0, c='r')
plt.axvline(0, c='orange')
plt.scatter(np.mean(data_rf_composition, axis=0)[g2], np.mean(data_rf_contribution, axis=1)[g2], c='b', s=10)
plt.scatter(np.mean(data_rf_composition, axis=0)[g1], np.mean(data_rf_contribution, axis=1)[g1], c='r', s=10)
plt.figure(figsize=(14,10))
plt.subplot(221)
plt.title("CC-Plot for DSD using RF Colored by Trg")
plt.xlabel("Composition Mean")
plt.ylabel("Contribution Mean")
plt.axhline(0, c='r')
plt.axvline(0, c='orange')
plt.scatter(np.mean(data_lr_composition, axis=0), np.mean(data_lr_contribution, axis=1), c=Y)
cbar = plt.colorbar()
cbar.ax.tick_params()
plt.subplot(222)
plt.title("CC-Plot for DSD using RF Colored by Trg")
plt.xlabel("Composition Mean")
plt.ylabel("Contribution Mean")
plt.axhline(0, c='r')
plt.axvline(0, c='orange')
plt.scatter(np.mean(data_rf_composition, axis=0), np.mean(data_rf_contribution, axis=1), c=Y)
cbar = plt.colorbar()
cbar.ax.tick_params()
uniq_elementsA = np.unique(data['main_element'])
uniq_elements_countA = len(uniq_elementsA)
for y in range(0, uniq_elements_countA):
for z in range(0, uniq_elements_countA):
a = np.where(data['main_element'] == uniq_elementsA[y])[0]
b = np.where(data['main_element'] == uniq_elementsA[z])[0]
avc_matrix[y,z] = np.mean(data_rf_contribution[a,:][:,b])
avc_matrix[np.where(avc_matrix > 0)] /= np.max(avc_matrix)
avc_matrix[np.where(avc_matrix < 0)] /= -np.min(avc_matrix)
counter = 1
unique_elements_display = []
for i in uniq_elementsA:
if counter % 2 == 0:
unique_elements_display.append('{}⟼'.format(i))
else:
unique_elements_display.append(i)
counter += 1
plt.figure(figsize=(12,10))
plt.xticks(ticks=np.arange(0, uniq_elements_countA), labels=unique_elements_display, fontsize=18, rotation=90)
plt.yticks(ticks=np.arange(0, uniq_elements_countA), labels=unique_elements_display, fontsize=18)
plt.imshow(avc_matrix.T)
plt.ylabel("Main Element Contributions", fontsize=24)
plt.xlabel("Main Element Compositions", fontsize=24)
cbar = plt.colorbar()
cbar.ax.tick_params()
ticklabs = cbar.ax.get_yticks()
cbar.ax.set_yticklabels(ticklabs, fontsize=18)
plt.savefig("Figures/MGD_heatmap.pdf", bbox_inches='tight')