In [31]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys

from sklearn.ensemble import RandomForestRegressor 
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn import linear_model
from sklearn import tree
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
import matplotlib.cm as cm
from sklearn.decomposition import PCA

from ELib import *

import warnings
warnings.filterwarnings('ignore')
import ResidualDecomposition as RD

np.random.seed(0)
In [33]:
data = pd.read_csv("../../Data/applied/Metallic_Glass_Forming_with_features.csv")
In [34]:
data.head()
Out[34]:
Material compositions main_element Trg Density_composition_average IsBoron_composition_average IsDBlock_composition_average IsTransitionMetal_composition_average NdValence_composition_average NValance_composition_average HeatVaporization_max_value ... NdValence_difference NsUnfilled_difference valence_difference Site1_Density Site1_HeatCapacityMass Site1_HeatFusion Site1_IsDBlock Site1_IsTransitionMetal Site1_NdValence Site1_SpecificHeatCapacity
0 Ag10Cu35Zr55 Zr 0.534 0.579069 -0.5856 1.029882 1.029882 0.589536 0.217571 0.563915 ... 0.473852 0.857849 0.264503 1.091651 -0.887077 -0.454471 0.718014 0.718014 1.31768 -0.884994
1 Ag10Cu50Zr40 Cu 0.608 0.748401 -0.5856 1.029882 1.029882 1.176250 0.653969 0.563915 ... 0.473852 0.857849 0.264503 1.091651 -0.887077 -0.454471 0.718014 0.718014 1.31768 -0.884994
2 Ag10Cu55Zr35 Cu 0.609 0.804845 -0.5856 1.029882 1.029882 1.371822 0.799435 0.563915 ... 0.473852 0.857849 0.264503 1.091651 -0.887077 -0.454471 0.718014 0.718014 1.31768 -0.884994
3 Ag20Cu40Zr40 Cu 0.577 0.821973 -0.5856 1.029882 1.029882 1.176250 0.653969 0.563915 ... 0.473852 0.857849 0.264503 1.091651 -0.887077 -0.454471 0.718014 0.718014 1.31768 -0.884994
4 Ag35Ca65 Ca 0.515 -0.860992 -0.5856 -0.998961 -0.998961 -0.437214 -0.613663 -1.684370 ... 1.005911 0.857849 -1.745035 1.091651 -0.887077 -0.454471 0.718014 0.718014 1.31768 -0.884994

5 rows × 23 columns

In [35]:
Y = data['Trg']
X = data.iloc[:,4:]
X = scale_data(X).values
In [36]:
rf = RandomForestRegressor(n_estimators=25)
rf.fit(X, Y)

features_vals = np.sort(rf.feature_importances_)
features_args = np.argsort(rf.feature_importances_)

plt.xticks(rotation=90)
plt.barh(data.columns[4:][features_args], features_vals)
plt.title("Feature Importances according to Random Forest")
plt.xlabel("Relative Importance Value")
plt.ylabel("Feature Descriptor")
Out[36]:
Text(0, 0.5, 'Feature Descriptor')
In [37]:
data_lr = np.genfromtxt("Data/LR_MGD.csv", delimiter=',')
data_lr_composition = data_lr
summed_composition = np.sum(data_lr_composition, axis=0)
data_lr_contribution = ((data_lr_composition.T * -np.sign(summed_composition))).T

data_rf = np.genfromtxt("Data/RF_MGD.csv", delimiter=',')
data_rf_composition = data_rf
summed_composition = np.sum(data_rf_composition, axis=0)
data_rf_contribution = ((data_rf_composition.T * -np.sign(summed_composition))).T
In [38]:
def plot_group(elements_subgroup, data_d, index_i):
    g1 = d[d.iloc[:,index_i].isin(elements_subgroup)].index
    g2 = d[~d.iloc[:,index_i].isin(elements_subgroup)].index
    # plt.xlabel("Composition Mean")
    # plt.ylabel("Contribution Mean")
    plt.axhline(0, c='r')
    plt.axvline(0, c='orange')
    plt.scatter(np.mean(data_rf_composition, axis=0)[g2], np.mean(data_rf_contribution, axis=1)[g2], c='b', s=10)
    plt.scatter(np.mean(data_rf_composition, axis=0)[g1], np.mean(data_rf_contribution, axis=1)[g1], c='r', s=10)
In [39]:
plt.figure(figsize=(14,10))

plt.subplot(221)

plt.title("CC-Plot for DSD using RF Colored by Trg")
plt.xlabel("Composition Mean")
plt.ylabel("Contribution Mean")
plt.axhline(0, c='r')
plt.axvline(0, c='orange')
plt.scatter(np.mean(data_lr_composition, axis=0), np.mean(data_lr_contribution, axis=1), c=Y)

cbar = plt.colorbar()
cbar.ax.tick_params()

plt.subplot(222)

plt.title("CC-Plot for DSD using RF Colored by Trg")
plt.xlabel("Composition Mean")
plt.ylabel("Contribution Mean")
plt.axhline(0, c='r')
plt.axvline(0, c='orange')
plt.scatter(np.mean(data_rf_composition, axis=0), np.mean(data_rf_contribution, axis=1), c=Y)

cbar = plt.colorbar()
cbar.ax.tick_params()
In [43]:
uniq_elementsA = np.unique(data['main_element'])
uniq_elements_countA = len(uniq_elementsA)

for y in range(0, uniq_elements_countA):
    for z in range(0, uniq_elements_countA):
        a = np.where(data['main_element'] == uniq_elementsA[y])[0]
        b = np.where(data['main_element'] == uniq_elementsA[z])[0]
        avc_matrix[y,z] = np.mean(data_rf_contribution[a,:][:,b])
        
avc_matrix[np.where(avc_matrix > 0)] /= np.max(avc_matrix)
avc_matrix[np.where(avc_matrix < 0)] /= -np.min(avc_matrix)

counter = 1
unique_elements_display = []
for i in uniq_elementsA:
    
    if counter % 2 == 0:
        unique_elements_display.append('{}⟼'.format(i))
    else:
        unique_elements_display.append(i)
    counter += 1

plt.figure(figsize=(12,10))
plt.xticks(ticks=np.arange(0, uniq_elements_countA), labels=unique_elements_display, fontsize=18, rotation=90)
plt.yticks(ticks=np.arange(0, uniq_elements_countA), labels=unique_elements_display, fontsize=18)
plt.imshow(avc_matrix.T)

plt.ylabel("Main Element Contributions", fontsize=24)
plt.xlabel("Main Element Compositions", fontsize=24)

cbar = plt.colorbar()
cbar.ax.tick_params()

ticklabs = cbar.ax.get_yticks()
cbar.ax.set_yticklabels(ticklabs, fontsize=18)

plt.savefig("Figures/MGD_heatmap.pdf", bbox_inches='tight')
In [ ]: