"""
"This script is wrote by Xin Li"
Descriptor calculation script for adsorbates-containing system:
    Automatic identification of adsorbents'ads',automatic identification of adsorption sites'center_index'

    1、The data path in the home directory identifies the VASP structure file
    2、Configure the 'Feature' table file in the home directory
    3、Adsorbates：C H O N
    4、The vacuum layer should be greater than the pbc threshold
    5、The maximum coordination number should be included in the number of substrate-slab floors
    6、The adsorption site and the nearest neighbor ratio for calculating the coordination number are the same.
        ratio = 1.2

"""

import os
from ase.io import vasp
from ase.io.trajectory import Trajectory
import numpy as np
import pandas as pd
from scipy.stats.mstats import gmean
import ase
from ase import *
from ase import neighborlist
from ase.visualize import view
import time
import warnings

t0 = time.time()
warnings.filterwarnings("ignore", category=Warning)

Feature = pd.DataFrame(pd.read_excel('Feature.xlsx'))
Feature.set_index(["AtomicNumber"], inplace=True)
result = pd.DataFrame()

filePath = 'structure'
file = os.listdir(filePath)
path = 'structure\\{}'

ads_atomicnumber = [1, 6, 7, 8]

######main function########
for item in range(len(file)):
    #structure = Trajectory(path.format(file[item]))[-1]
    structure = vasp.read_vasp(path.format(file[item]))
    print(structure)
    #c = xxx
    cell = structure.get_cell()
    # print(cell)
    structure.set_pbc([True, True, True])


    # view(structure)
    # print(structure)

    def height(position):
        n = np.cross(cell[0], cell[1])
        h = abs(np.array(position).dot(n) / (np.sqrt(n.dot(n))))
        return h


    ads = []
    for i in range(len(structure)):
        if structure.get_atomic_numbers()[i] in ads_atomicnumber:
            ads.append(i)


    surface = structure.copy()
    #surface = vasp.read_vasp(path.format(file[item]))
    surface.set_pbc([True, True, True])
    i = 0
    outflag = 0
    while outflag == 0:
        if surface.get_atomic_numbers()[i] in ads_atomicnumber:
            surface.pop(i)
            i = i - 1
        else:
            i = i + 1
        a = list(set(surface.get_atomic_numbers()))
        b = [1, 6, 7, 8]
        inter = [j for j in a if j in b]
        if len(inter) == 0:
            outflag = 1
    surface = surface.repeat([2, 2, 1])
    surface.set_pbc([True, True, False])


    structure_reset = structure.copy()
    #structure_reset = vasp.read_vasp(path.format(file[item]))
    structure_reset.set_pbc([True, True, True])
    i = 0
    outflag = 0
    while outflag == 0:
        if structure_reset.get_atomic_numbers()[i] in ads_atomicnumber:
            structure_reset.pop(i)
            i = i - 1
        else:
            i = i + 1
        a = list(set(structure_reset.get_atomic_numbers()))
        inter = [j for j in a if j in ads_atomicnumber]
        if len(inter) == 0:
            outflag = 1
    structure_reset = structure_reset.repeat([2, 2, 1])

    ads_new = []
    for i in range(len(ads)):
        structure_reset.append(Atom(structure.get_atomic_numbers()[ads[i]], position=structure.get_positions()[ads[i]]))
        ads_new.append(len(structure_reset) - 1)
    structure_reset.set_pbc([True, True, False])


    ads = ads_new
    # print(ads)
    # view(surface)
    #view(structure_reset)


    ads_h = []
    ads_c = []
    for i in range(len(ads)):
        ads_h.append(height(structure_reset.get_positions()[ads[i]]))
    for i in range(len(ads)):
        if ads_h[i] == min(ads_h):
            ads_c.append(ads[i])


    def distance(center):
        distance = []
        for i in range(len(surface)):
            for j in range(len(center)):
                d = structure_reset.get_distance(center[j], i, mic=True)
                distance.append(d)
        for i in range(len(distance)):
            if distance[i] == 0:
                distance[i] = distance[i] + max(distance)
        # print(min(distance))
        return distance


    def neighbor_atom(center, s):
        def cns(neiglist):
            cns = []
            complete_nl = []  # include neighborlist of all atoms
            for i in range(len(s)):
                neighs = neiglist.get_neighbors(i)
                neighs = neighs[0].tolist()
                neighs = [x for x in neighs if x != i]
                # print neighs
                cns.append(len(neighs))
                complete_nl.append(neighs)
            # print final_nl
            return np.array(cns), np.array(complete_nl)  # turn into an array

        ####### New Section ###################
        # update neighborlist and calculate new cns
        outflag = 0
        ratio = 1.2
        # print(min(distance(center)))
        while outflag == 0:
            cutoff = ratio * min(distance(center)) / 2
            # print(cutoff)
            cutoffs = [cutoff] * len(s)
            # print(cutoffs)
            nl = neighborlist.NeighborList(cutoffs, bothways=True, skin=0.0)
            nl.update(s)
            cordns, final_nl = cns(nl)  # final_nl = complete_nl
            if max(cordns) > 12:
                ratio = ratio - 0.02
            else:
                outflag = 1

        # print(cordns[center_index], final_nl[center_index])
        return cordns, final_nl, cordns[center], final_nl[center]

    x1, x2, center_num, center_atoms = neighbor_atom(ads_c, structure_reset)
    center_atoms = center_atoms[0]
    center_index = []
    for i in range(len(center_atoms)):
        if center_atoms[i] not in ads:
            center_index.append(center_atoms[i])


    CN_all, neighbor_all, CN_center, neighbor_center = neighbor_atom(center_index, surface)
    # print(CN_all,neighbor_all,CN_center,neighbor_center)
    nc = np.hstack((neighbor_center[i] for i in range(len(center_index))))
    nc = list(tuple(set(nc)))
    nearest_neighbor = []
    for i in range(len(nc)):
        if nc[i] not in center_index:
            nearest_neighbor.append(nc[i])


    local_structure = []
    local_structure = local_structure + nearest_neighbor
    local_structure = local_structure + center_index
    local_structure = list(tuple(set(local_structure)))

    print('The indexes of adsorption atoms are: ', center_index)
    print('The indexes of nearest neighbor atoms are: ', nearest_neighbor)
    print('The indexes of local structure atoms are:', local_structure)



    def electronic_descriptor(local_electronic_structure_index, local_electronic_structure):
        for i in range(len(local_electronic_structure_index)):
            #if local_electronic_structure.index[i] == 29:
            #    local_electronic_structure.loc[29, 'χ'] = 1.9 ** 0.5
            if local_electronic_structure.index[i] == 47:
                local_electronic_structure.loc[47, 'χ'] = 1.93 ** 0.5
            if local_electronic_structure.index[i] == 79:
                local_electronic_structure.loc[79, 'χ'] = 2.54 ** 0.5

        Sv1 = np.round(gmean(local_electronic_structure['Sv'].tolist()), 6)
        e1 = np.round(gmean(local_electronic_structure['χ'].tolist()), 6)
        psi1 = np.round(Sv1 ** 2 / e1, 6)
        Ecoh = 0 - np.round(gmean(list(map(abs,local_electronic_structure['Ecoh'].tolist()))), 6)
        An = np.round(gmean(local_electronic_structure.index.tolist()), 6)

        return Sv1, e1, psi1,Ecoh,An



    center_atoms = []
    site_ele = []
    for k in range(len(center_index)):
        an = structure_reset.get_atomic_numbers()[center_index[k]]
        center_atoms.append(Feature.loc[an, :])
        ele = structure_reset.get_chemical_symbols()[center_index[k]]
        site_ele.append(ele)
    center_atoms = pd.DataFrame(center_atoms)
    Sv0, e0, psi0, Ecoh0, An0 = electronic_descriptor(center_index, center_atoms)


    nearest_atoms = []
    for k in range(len(nearest_neighbor)):
        an = structure_reset.get_atomic_numbers()[nearest_neighbor[k]]
        nearest_atoms.append(Feature.loc[an, :])
    nearest_atoms = pd.DataFrame(nearest_atoms)
    Sv1, e1, psi1, Ecoh1,An1 = electronic_descriptor(nearest_neighbor, nearest_atoms)


    local_atoms = []
    for k in range(len(local_structure)):
        an = structure_reset.get_atomic_numbers()[local_structure[k]]
        local_atoms.append(Feature.loc[an, :])
    local_atoms = pd.DataFrame(local_atoms)
    Sv, e, psi, Ecoh,An = electronic_descriptor(local_structure, local_atoms)

    psi_gra = psi0*psi0/psi

    Sv3 = (Sv0 ** 3 * Sv1)**( 1/4 )
    e3 = (e0 ** 3 * e1) ** (1 / 4)
    psi3 = (psi0 ** 3 * psi1) ** (1 / 4)


    CN_s = len(nearest_neighbor)


    CN_m = []
    for i in range(len(surface)):
        nc2 = np.hstack([neighbor_all[i]])
        nc2 = list(tuple(set(nc2)))
        CN_m.append(len(nc2))
        if len(center_index) > 1:
            for j in range(len(surface)):
                if j in neighbor_all[i]:
                    nc2 = np.hstack([neighbor_all[i], neighbor_all[j]])
                    nc2 = list(tuple(set(nc2)))
                    nc2.remove(i)
                    nc2.remove(j)
                    CN_m.append(len(nc2))
                    if len(center_index) > 2:
                        for k in range(len(surface)):
                            if k in neighbor_all[i] and k in neighbor_all[j]:
                                nc2 = np.hstack([neighbor_all[i], neighbor_all[j], neighbor_all[k]])
                                nc2 = list(tuple(set(nc2)))
                                nc2.remove(i)
                                nc2.remove(j)
                                nc2.remove(k)
                                CN_m.append(len(nc2))
                                if len(center_index) > 3:
                                    for l in range(len(surface)):
                                        if l in neighbor_all[i] and l in neighbor_all[j] and l in neighbor_all[k]:
                                            nc2 = np.hstack(
                                                [neighbor_all[i], neighbor_all[j], neighbor_all[k], neighbor_all[l]])
                                            nc2 = list(tuple(set(nc2)))
                                            nc2.remove(i)
                                            nc2.remove(j)
                                            nc2.remove(k)
                                            nc2.remove(l)
                                            CN_m.append(len(nc2))

    GCN_s = []
    GCN1 = 0
    for i in range(CN_s):
        GCN1 = GCN1 + CN_all[nearest_neighbor[i]]
    GCN_s = GCN1 / max(CN_m)
    # print(GCN_s)
    # print(CN_s)


    Ng = gmean(Feature.loc[surface.get_atomic_numbers(), "Ng"])
    Np = gmean(Feature.loc[surface.get_atomic_numbers(), "Np"])


    result = result.append(
        pd.DataFrame({'Structure': file[item],
                      'psi0': psi0,
                      'psi': psi,
                      'psi’_ad': psi_gra,
                      'Ecoh': Ecoh0,
                      'GCN': GCN_s, 'CN': CN_s,
                      'site_elements':str(site_ele),
                      'site_atoms_no': len(center_index), 'site_atoms': str(center_index),
                      'local_atoms_no': len(local_structure), 'local_atoms': str(local_structure)
                      }, index=[item]))

    print(item + 1)

result.to_excel('result.xlsx')

print('time = {:3f}'.format(time.time() - t0))