{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Excel to/from SBML\n", "Peter Schubert (2020-11-15)\n", "

" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "import sbmlxdf" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Ecoli models\n", "\n", "xlsx_dir = 'xlsx_models/ecoli'\n", "sbml_dir = 'SBML_models/ecoli'\n", "\n", "# model_name = 'iJO1366_ccFBA'\n", "# model_name = 'iJO1366_MOMENT'\n", "# model_name = 'iJO1366_MOMENTmr'\n", "model_name = 'iJO1366_GECKO'\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Yeast models\n", "xlsx_dir = 'xlsx_models/yeast'\n", "sbml_dir = 'SBML_models/yeast'\n", "\n", "model_name = 'ecYeastGEM_batch_8_3_4'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Synechocystis models\n", "xlsx_dir = 'xlsx_models/synechocystis'\n", "sbml_dir = 'SBML_models/synechocystis'\n", "\n", "model_name = 'iJN678_predicted_GECKO'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/peter/Documents/CCB/work/GECKO\n" ] } ], "source": [ "print(os.getcwd())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## convert SBML model to xlsx model" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SBML_models/synechocystis/iJN678_predicted_GECKO.xml imported\n", "last modified: Sun Oct 27 12:48:01 2024\n" ] } ], "source": [ "sbml_name = os.path.join(sbml_dir, model_name) + '.xml'\n", "if os.path.exists(sbml_name):\n", " model = sbmlxdf.Model(sbml_name)\n", " print(sbml_name, 'imported')\n", " print(\"last modified: %s\" % time.ctime(os.path.getmtime(sbml_name)))\n", "else:\n", " print(f'ERROR, not found!!! {sbml_name}')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.validate_sbml()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "xlsx_models/synechocystis/iJN678_predicted_GECKO.xlsx exported\n" ] } ], "source": [ "xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n", "model.to_excel(xlsx_name)\n", "print(xlsx_name, 'exported')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## convert Excel to SBML model" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "xlsx_models/eciJO1366_ccfba.xlsx imported\n", "last modified: Sat Jun 24 15:55:15 2023\n" ] } ], "source": [ "xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n", "if os.path.exists(xlsx_name):\n", " model = sbmlxdf.Model(xlsx_name)\n", " print(xlsx_name, 'imported')\n", " print(\"last modified: %s\" % time.ctime(os.path.getmtime(xlsx_name)))\n", "else:\n", " print(f'ERROR, not found!!! {xlsx_name}')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.validate_sbml()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SBML_models/eciJO1366_ccfba.xml exported\n" ] } ], "source": [ "sbml_name = os.path.join(sbml_dir, model_name) + '.xml'\n", "model.export_sbml(sbml_name)\n", "print(sbml_name, 'exported')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## convert reactants - products table to reactants string" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import re" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n", "with pd.ExcelFile(xlsx_name) as xlsx:\n", " df_reactions = pd.read_excel(xlsx, sheet_name = 'reactions')" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "for idx, row in df_reactions.iterrows():\n", " new_srefs = []\n", " for sref in row['reactants'].split(';'):\n", " params = sbmlxdf.extract_params(sref)\n", " if 'stoic' not in params:\n", " params['stoic'] = '1.0'\n", " params['const'] = True\n", " new_srefs.append(', '.join([f'{key}={val}' for key, val in params.items()]))\n", " df_reactions.at[idx, 'new_reactants'] = '; '.join(new_srefs) \n", " new_srefs = []\n", " for sref in row['products'].split(';'):\n", " params = sbmlxdf.extract_params(sref)\n", " if 'stoic' not in params:\n", " params['stoic'] = '1.0'\n", " params['const'] = True\n", " new_srefs.append(', '.join([f'{key}={val}' for key, val in params.items()]))\n", " df_reactions.at[idx, 'new_products'] = '; '.join(new_srefs) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
label
id
G_Q0045Q0045
G_Q0250Q0250
G_Q0275Q0275
G_YDL067CYDL067C
G_YEL039CYEL039C
\n", "
" ], "text/plain": [ " label\n", "id \n", "G_Q0045 Q0045\n", "G_Q0250 Q0250\n", "G_Q0275 Q0275\n", "G_YDL067C YDL067C\n", "G_YEL039C YEL039C" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_gps = pd.DataFrame(gp2label.values(), index=gp2label, columns=['label'])\n", "df_gps.index.name='id'\n", "df_gps.head()" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "xlsx_name = 'tmp.xlsx'\n", "with pd.ExcelWriter('tmp.xlsx') as writer: \n", " df_reactions.to_excel(writer, sheet_name='fbcGeneProducts')\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "m_dict = model.to_df()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "value 0.0\n", "units mmol_per_gDW_per_hr\n", "constant True\n", "Name: fbc_0_bound, dtype: object" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "m_dict['parameters'].iloc[0]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "value 0.1\n", "units mmol_per_gDW_per_hr\n", "constant True\n", "Name: fbc_r_1001_lb, dtype: object" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s_bound = pd.Series([0.1, 'mmol_per_gDW_per_hr', True], index=['value', 'units', 'constant'])\n", "s_bound.name = 'fbc_r_1001_lb'\n", "s_bound" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.11 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" } }, "nbformat": 4, "nbformat_minor": 2 }