{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "9391dbcb",
"metadata": {},
"outputs": [],
"source": [
"import ase\n",
"from ase import io\n",
"import numpy as np\n",
"import chemiscope\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "10fb366f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" refcode | \n",
" metal | \n",
" total_charge | \n",
" spin_multiplicity | \n",
" elem_nr | \n",
" m_ox | \n",
" d_elec | \n",
" CN | \n",
" geometry | \n",
" rel_m | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" KIPLIH | \n",
" Cr | \n",
" 0 | \n",
" 1 | \n",
" 24 | \n",
" 0 | \n",
" 5 | \n",
" 5 | \n",
" Square pyramidal | \n",
" 0.836 | \n",
"
\n",
" \n",
" 1 | \n",
" AYIZIS | \n",
" Cr | \n",
" 0 | \n",
" 1 | \n",
" 24 | \n",
" 0 | \n",
" 5 | \n",
" 6 | \n",
" Octahedral | \n",
" 0.848 | \n",
"
\n",
" \n",
" 2 | \n",
" MISLAB | \n",
" Cr | \n",
" 0 | \n",
" 1 | \n",
" 24 | \n",
" 0 | \n",
" 5 | \n",
" 6 | \n",
" Octahedral | \n",
" 0.859 | \n",
"
\n",
" \n",
" 3 | \n",
" KOQBAT | \n",
" Cr | \n",
" 0 | \n",
" 1 | \n",
" 24 | \n",
" 0 | \n",
" 5 | \n",
" 6 | \n",
" Octahedral | \n",
" 0.860 | \n",
"
\n",
" \n",
" 4 | \n",
" VOWRAA | \n",
" Cr | \n",
" -1 | \n",
" 1 | \n",
" 24 | \n",
" 0 | \n",
" 5 | \n",
" 6 | \n",
" Octahedral | \n",
" 0.856 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" refcode metal total_charge spin_multiplicity elem_nr m_ox d_elec CN \\\n",
"0 KIPLIH Cr 0 1 24 0 5 5 \n",
"1 AYIZIS Cr 0 1 24 0 5 6 \n",
"2 MISLAB Cr 0 1 24 0 5 6 \n",
"3 KOQBAT Cr 0 1 24 0 5 6 \n",
"4 VOWRAA Cr -1 1 24 0 5 6 \n",
"\n",
" geometry rel_m \n",
"0 Square pyramidal 0.836 \n",
"1 Octahedral 0.848 \n",
"2 Octahedral 0.859 \n",
"3 Octahedral 0.860 \n",
"4 Octahedral 0.856 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_1 = pd.read_csv(\"property_2063.txt\",sep='\\t')\n",
"df_1.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "64c2e198",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ruben/anaconda3/lib/python3.8/site-packages/chemiscope/structures.py:278: UserWarning: the following structure properties properties are only defined for a subset of frames: ['-1', '-2', '-3', '-4', '-5', '0', '1', '2', '3', '4', '5', '6']; they will be ignored\n",
" warnings.warn(\n"
]
}
],
"source": [
"mols_1 = []\n",
"for structure in df_1[\"refcode\"]:\n",
" mols_1.append(ase.io.read(f\"Ground_state_spin_dataset/{structure}.xyz\"))\n",
"\n",
"properties_1 = {}\n",
"for key in df_1.keys():\n",
" #print(key)\n",
" units = None\n",
" if key == \"refcode\" or key == \"name\":\n",
" continue\n",
"\n",
" if \"total_charge\" in key:\n",
" units = \"\"\n",
"\n",
" if \"spin_multiplicity\" in key:\n",
" units = \"\"\n",
"\n",
" if \"elem_nr\" in key:\n",
" units = \"\"\n",
"\n",
" if key == \"m_ox\":\n",
" units = \"\"\n",
"\n",
" if key == \"d_elec\":\n",
" units = \"electrons\"\n",
" \n",
" if key == \"CN\":\n",
" units == \"\"\n",
" \n",
" if key == \"rel_m\":\n",
" units == \"\"\n",
" \n",
" if units is not None:\n",
" keydict = {\n",
" \"target\": \"structure\",\n",
" \"values\": df_1[f\"{key}\"].to_list(),\n",
" \"units\": f\"{units}\",\n",
" }\n",
" else:\n",
" continue\n",
" properties_1[f\"{key}\"] = keydict\n",
"\n",
"chemiscope.write_input(\n",
" path=f\"Ground_state_spin_dataset_chemiscope.json.gz\",\n",
" frames=mols_1,\n",
" properties=properties_1,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "b3086ad5",
"metadata": {},
"outputs": [],
"source": [
"widget = chemiscope.show(mols_1, properties_1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "098c19d3",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0518e2ae986c45c5a2bb39b279944cd7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"ChemiscopeWidget(value=None, data='{\"meta\": {\"name\": \" \"}, \"structures\": [{\"size\": 59, \"names\": [\"Cr\", \"O\", \"O…"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"widget"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b707bb27",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" refcode | \n",
" metal | \n",
" total_charge | \n",
" spin_multiplicity | \n",
" elem_nr | \n",
" m_ox | \n",
" d_elec | \n",
" hapticity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" DIDSEQ | \n",
" Sc | \n",
" 0 | \n",
" 1 | \n",
" 21 | \n",
" 3 | \n",
" 0 | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" EZUYUW | \n",
" Sc | \n",
" 0 | \n",
" 1 | \n",
" 21 | \n",
" 3 | \n",
" 0 | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" NURLAQ | \n",
" Sc | \n",
" 0 | \n",
" 1 | \n",
" 21 | \n",
" 3 | \n",
" 0 | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" ZIGKOS | \n",
" Sc | \n",
" 0 | \n",
" 1 | \n",
" 21 | \n",
" 3 | \n",
" 0 | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" ACOJAD | \n",
" Ti | \n",
" 0 | \n",
" 3 | \n",
" 22 | \n",
" 2 | \n",
" 2 | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" refcode metal total_charge spin_multiplicity elem_nr m_ox d_elec \\\n",
"0 DIDSEQ Sc 0 1 21 3 0 \n",
"1 EZUYUW Sc 0 1 21 3 0 \n",
"2 NURLAQ Sc 0 1 21 3 0 \n",
"3 ZIGKOS Sc 0 1 21 3 0 \n",
"4 ACOJAD Ti 0 3 22 2 2 \n",
"\n",
" hapticity \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 False \n",
"4 False "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_2 = pd.read_csv(\"property_1838.txt\",sep='\\t')\n",
"df_2.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "d5b4015f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ruben/anaconda3/lib/python3.8/site-packages/chemiscope/structures.py:278: UserWarning: the following structure properties properties are only defined for a subset of frames: ['-1', '-2', '-3', '-4', '0', '1', '2', '3', '4', '5', '6']; they will be ignored\n",
" warnings.warn(\n"
]
}
],
"source": [
"mols_2 = []\n",
"for structure in df_2[\"refcode\"]:\n",
" mols_2.append(ase.io.read(f\"Supplementary_dataset/{structure}.xyz\"))\n",
"\n",
"properties_2 = {}\n",
"for key in df_2.keys():\n",
" #print(key)\n",
" units = None\n",
" if key == \"refcode\" or key == \"name\":\n",
" continue\n",
"\n",
" if \"total_charge\" in key:\n",
" units = \"\"\n",
"\n",
" if \"spin_multiplicity\" in key:\n",
" units = \"\"\n",
"\n",
" if \"elem_nr\" in key:\n",
" units = \"\"\n",
"\n",
" if key == \"m_ox\":\n",
" units = \"\"\n",
"\n",
" if key == \"d_elec\":\n",
" units = \"electrons\"\n",
" \n",
" if key == \"CN\":\n",
" units == \"\"\n",
" \n",
" if key == \"rel_m\":\n",
" units == \"\"\n",
" \n",
" if key == \"hapticity\":\n",
" units == \"\"\n",
" \n",
" if key == \"hapttype\":\n",
" units == \"\"\n",
" \n",
" if units is not None:\n",
" keydict = {\n",
" \"target\": \"structure\",\n",
" \"values\": df_2[f\"{key}\"].to_list(),\n",
" \"units\": f\"{units}\",\n",
" }\n",
" else:\n",
" continue\n",
" properties_2[f\"{key}\"] = keydict\n",
"\n",
"chemiscope.write_input(\n",
" path=f\"Supplementary_dataset_chemiscope.json.gz\",\n",
" frames=mols_2,\n",
" properties=properties_2,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5cb2d443",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7ffde49",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa7105fc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "447712c8",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8226984d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5db74c86",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fdbdec69",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e3956fe",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a61b141f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8458cc7f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "dad0267d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "02ad7df5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "22b2a8c0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3f6c7c0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6818d8c3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a387fbed",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b5e7e8a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf083db1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}