{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn import metrics\n", "from sklearn.manifold import TSNE\n", "from sklearn.preprocessing import StandardScaler\n", "import ase.io\n", "import chemiscope\n", "from pymatgen.core import Structure, Lattice, Composition\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_json('topoclass.json.gz')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDsubsetMAT_TYPETQC_TYPEstructureMP_IDICSD_IDAtomicOrbitals|HOMO_characterAtomicOrbitals|HOMO_elementAtomicOrbitals|HOMO_energy...BondFractions|Ho - La bond frac.BondFractions|Au - Pd bond frac.BondFractions|Dy - Hg bond frac.BondFractions|Er - Yb bond frac.BondFractions|Hg - Tm bond frac.BondFractions|Er - Hg bond frac.BondFractions|Be - Sc bond frac.BondFractions|Hg - Tb bond frac.BondFractions|Be - Lu bond frac.BondFractions|Er - Y bond frac.
0MAT00026906M/TTINone{'@module': 'pymatgen.core.structure', '@class...mp-10008NaN120-0.141411...0000000.000.00
1MAT00020270M/TTrINone{'@module': 'pymatgen.core.structure', '@class...mp-1001604NaN216-0.261676...0000000.000.00
2MAT00015322M/THSLSMNone{'@module': 'pymatgen.core.structure', '@class...mp-1001605NaN459-0.155138...0000000.000.00
3MAT00025094M/TTCINone{'@module': 'pymatgen.core.structure', '@class...mp-1001844NaN27-0.266297...0000000.000.00
4MAT00025479M/THSLSMNone{'@module': 'pymatgen.core.structure', '@class...mp-1002220NaN251-0.185623...0000000.000.00
..................................................................
35603MAT00038674M&TTrILCEBR{'@module': 'pymatgen.core.structure', '@class...mp-69673628541.029-0.415606...0000000.000.00
35604MAT00038675M&TTrILCEBR{'@module': 'pymatgen.core.structure', '@class...mp-70727620730.027-0.266297...0000000.000.00
35605MAT00038676M&TTrILCEBR{'@module': 'pymatgen.core.structure', '@class...mp-70733467509.028-0.338381...0000000.000.00
35606MAT00038677M&TTrILCEBR{'@module': 'pymatgen.core.structure', '@class...mp-70789759327.028-0.338381...0000000.000.00
35607MAT00038678M&TTrILCEBR{'@module': 'pymatgen.core.structure', '@class...mp-919879005.0215-0.206080...0000000.000.00
\n", "

35608 rows × 4717 columns

\n", "
" ], "text/plain": [ " ID subset MAT_TYPE TQC_TYPE \\\n", "0 MAT00026906 M/T TI None \n", "1 MAT00020270 M/T TrI None \n", "2 MAT00015322 M/T HSLSM None \n", "3 MAT00025094 M/T TCI None \n", "4 MAT00025479 M/T HSLSM None \n", "... ... ... ... ... \n", "35603 MAT00038674 M&T TrI LCEBR \n", "35604 MAT00038675 M&T TrI LCEBR \n", "35605 MAT00038676 M&T TrI LCEBR \n", "35606 MAT00038677 M&T TrI LCEBR \n", "35607 MAT00038678 M&T TrI LCEBR \n", "\n", " structure MP_ID ICSD_ID \\\n", "0 {'@module': 'pymatgen.core.structure', '@class... mp-10008 NaN \n", "1 {'@module': 'pymatgen.core.structure', '@class... mp-1001604 NaN \n", "2 {'@module': 'pymatgen.core.structure', '@class... mp-1001605 NaN \n", "3 {'@module': 'pymatgen.core.structure', '@class... mp-1001844 NaN \n", "4 {'@module': 'pymatgen.core.structure', '@class... mp-1002220 NaN \n", "... ... ... ... \n", "35603 {'@module': 'pymatgen.core.structure', '@class... mp-696736 28541.0 \n", "35604 {'@module': 'pymatgen.core.structure', '@class... mp-707276 20730.0 \n", "35605 {'@module': 'pymatgen.core.structure', '@class... mp-707334 67509.0 \n", "35606 {'@module': 'pymatgen.core.structure', '@class... mp-707897 59327.0 \n", "35607 {'@module': 'pymatgen.core.structure', '@class... mp-9198 79005.0 \n", "\n", " AtomicOrbitals|HOMO_character AtomicOrbitals|HOMO_element \\\n", "0 1 20 \n", "1 2 16 \n", "2 4 59 \n", "3 2 7 \n", "4 2 51 \n", "... ... ... \n", "35603 2 9 \n", "35604 2 7 \n", "35605 2 8 \n", "35606 2 8 \n", "35607 2 15 \n", "\n", " AtomicOrbitals|HOMO_energy ... BondFractions|Ho - La bond frac. \\\n", "0 -0.141411 ... 0 \n", "1 -0.261676 ... 0 \n", "2 -0.155138 ... 0 \n", "3 -0.266297 ... 0 \n", "4 -0.185623 ... 0 \n", "... ... ... ... \n", "35603 -0.415606 ... 0 \n", "35604 -0.266297 ... 0 \n", "35605 -0.338381 ... 0 \n", "35606 -0.338381 ... 0 \n", "35607 -0.206080 ... 0 \n", "\n", " BondFractions|Au - Pd bond frac. BondFractions|Dy - Hg bond frac. \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "35603 0 0 \n", "35604 0 0 \n", "35605 0 0 \n", "35606 0 0 \n", "35607 0 0 \n", "\n", " BondFractions|Er - Yb bond frac. BondFractions|Hg - Tm bond frac. \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "35603 0 0 \n", "35604 0 0 \n", "35605 0 0 \n", "35606 0 0 \n", "35607 0 0 \n", "\n", " BondFractions|Er - Hg bond frac. BondFractions|Be - Sc bond frac. \\\n", "0 0 0.0 \n", "1 0 0.0 \n", "2 0 0.0 \n", "3 0 0.0 \n", "4 0 0.0 \n", "... ... ... \n", "35603 0 0.0 \n", "35604 0 0.0 \n", "35605 0 0.0 \n", "35606 0 0.0 \n", "35607 0 0.0 \n", "\n", " BondFractions|Hg - Tb bond frac. BondFractions|Be - Lu bond frac. \\\n", "0 0 0.0 \n", "1 0 0.0 \n", "2 0 0.0 \n", "3 0 0.0 \n", "4 0 0.0 \n", "... ... ... \n", "35603 0 0.0 \n", "35604 0 0.0 \n", "35605 0 0.0 \n", "35606 0 0.0 \n", "35607 0 0.0 \n", "\n", " BondFractions|Er - Y bond frac. \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "... ... \n", "35603 0 \n", "35604 0 \n", "35605 0 \n", "35606 0 \n", "35607 0 \n", "\n", "[35608 rows x 4717 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "classes = {\"TrI\": 0, \"NTM\": 1, \"TI\": 2, \"TCI\": 3, \"HSPSM\": 4, \"HSLSM\": 5}\n", "mpe = \"MaximumPackingEfficiency|max packing efficiency\"\n", "fpv = \"ValenceOrbital|frac p valence electrons\"\n", "deltaH = \"Miedema|Miedema_deltaH_inter\"\n", "mat_type = \"MAT_TYPE\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "X = df[[mpe,fpv]].values\n", "X_scaled = StandardScaler().fit_transform(X)\n", "tsne = TSNE(n_components=2, init='pca', random_state=0)\n", "X_tsne = tsne.fit_transform(X_scaled)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "df[\"tSNE_1\"]=X_tsne[:,0]\n", "df[\"tSNE_2\"]=X_tsne[:,1]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "actual = (df[mat_type] == \"TrI\")\n", "predicted = (df[\"tSNE_1\"] > 0)\n", "confusion_matrix = metrics.confusion_matrix(actual, predicted)\n", "#TP = confusion_matrix[0,0]\n", "#FP = confusion_matrix[1,0]\n", "#FN = confusion_matrix[0,1]\n", "#TN = confusion_matrix[1,1]\n", "metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [\"NTM\", \"TrI\"]).plot(values_format='')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7575544821388452\n", "Precision: 0.9218279209909608\n", "Recall: 0.6955331958068455\n", "F1: 0.7928494301139772\n" ] } ], "source": [ "print(\"Accuracy: \", metrics.accuracy_score(actual, predicted))\n", "print(\"Precision:\", metrics.precision_score(actual, predicted))\n", "print(\"Recall: \", metrics.recall_score(actual, predicted))\n", "print(\"F1: \", metrics.f1_score(actual, predicted))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "frames = []\n", "for ind, row in df.iterrows():\n", " struc = Structure.from_dict(row[\"structure\"])\n", " atoms = struc.to_ase_atoms()\n", " atoms.info = {\n", " \"mat_id\": ind,\n", " \"mpe\": row[mpe],\n", " \"fpv\": row[fpv],\n", " \"deltaH\": row[deltaH],\n", " \"tSNE_1\": -row[\"tSNE_1\"],\n", " \"tSNE_2\": row[\"tSNE_2\"],\n", " \"mat_type_2\": int(classes[row[mat_type]]>0),\n", " \"mat_type_5\": classes[row[mat_type]]\n", " }\n", " frames.append(atoms)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "12a880bd4e41427b96a35cd5e0876bbc", "version_major": 2, "version_minor": 0 }, "text/html": [ "" ], "text/plain": [ "