{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Excel to/from SBML\n",
"Peter Schubert (2020-11-15)\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import time\n",
"import sbmlxdf"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Ecoli models\n",
"\n",
"xlsx_dir = 'xlsx_models/ecoli'\n",
"sbml_dir = 'SBML_models/ecoli'\n",
"\n",
"# model_name = 'iJO1366_ccFBA'\n",
"# model_name = 'iJO1366_MOMENT'\n",
"# model_name = 'iJO1366_MOMENTmr'\n",
"model_name = 'iJO1366_GECKO'\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Yeast models\n",
"xlsx_dir = 'xlsx_models/yeast'\n",
"sbml_dir = 'SBML_models/yeast'\n",
"\n",
"model_name = 'ecYeastGEM_batch_8_3_4'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Synechocystis models\n",
"xlsx_dir = 'xlsx_models/synechocystis'\n",
"sbml_dir = 'SBML_models/synechocystis'\n",
"\n",
"model_name = 'iJN678_predicted_GECKO'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/peter/Documents/CCB/work/GECKO\n"
]
}
],
"source": [
"print(os.getcwd())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## convert SBML model to xlsx model"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SBML_models/synechocystis/iJN678_predicted_GECKO.xml imported\n",
"last modified: Sun Oct 27 12:48:01 2024\n"
]
}
],
"source": [
"sbml_name = os.path.join(sbml_dir, model_name) + '.xml'\n",
"if os.path.exists(sbml_name):\n",
" model = sbmlxdf.Model(sbml_name)\n",
" print(sbml_name, 'imported')\n",
" print(\"last modified: %s\" % time.ctime(os.path.getmtime(sbml_name)))\n",
"else:\n",
" print(f'ERROR, not found!!! {sbml_name}')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.validate_sbml()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"xlsx_models/synechocystis/iJN678_predicted_GECKO.xlsx exported\n"
]
}
],
"source": [
"xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n",
"model.to_excel(xlsx_name)\n",
"print(xlsx_name, 'exported')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## convert Excel to SBML model"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"xlsx_models/eciJO1366_ccfba.xlsx imported\n",
"last modified: Sat Jun 24 15:55:15 2023\n"
]
}
],
"source": [
"xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n",
"if os.path.exists(xlsx_name):\n",
" model = sbmlxdf.Model(xlsx_name)\n",
" print(xlsx_name, 'imported')\n",
" print(\"last modified: %s\" % time.ctime(os.path.getmtime(xlsx_name)))\n",
"else:\n",
" print(f'ERROR, not found!!! {xlsx_name}')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{}"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.validate_sbml()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SBML_models/eciJO1366_ccfba.xml exported\n"
]
}
],
"source": [
"sbml_name = os.path.join(sbml_dir, model_name) + '.xml'\n",
"model.export_sbml(sbml_name)\n",
"print(sbml_name, 'exported')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## convert reactants - products table to reactants string"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n",
"with pd.ExcelFile(xlsx_name) as xlsx:\n",
" df_reactions = pd.read_excel(xlsx, sheet_name = 'reactions')"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"for idx, row in df_reactions.iterrows():\n",
" new_srefs = []\n",
" for sref in row['reactants'].split(';'):\n",
" params = sbmlxdf.extract_params(sref)\n",
" if 'stoic' not in params:\n",
" params['stoic'] = '1.0'\n",
" params['const'] = True\n",
" new_srefs.append(', '.join([f'{key}={val}' for key, val in params.items()]))\n",
" df_reactions.at[idx, 'new_reactants'] = '; '.join(new_srefs) \n",
" new_srefs = []\n",
" for sref in row['products'].split(';'):\n",
" params = sbmlxdf.extract_params(sref)\n",
" if 'stoic' not in params:\n",
" params['stoic'] = '1.0'\n",
" params['const'] = True\n",
" new_srefs.append(', '.join([f'{key}={val}' for key, val in params.items()]))\n",
" df_reactions.at[idx, 'new_products'] = '; '.join(new_srefs) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
| \n", " | label | \n", "
|---|---|
| id | \n", "\n", " |
| G_Q0045 | \n", "Q0045 | \n", "
| G_Q0250 | \n", "Q0250 | \n", "
| G_Q0275 | \n", "Q0275 | \n", "
| G_YDL067C | \n", "YDL067C | \n", "
| G_YEL039C | \n", "YEL039C | \n", "