{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Excel to/from SBML\n",
    "Peter Schubert (2020-11-15)\n",
    "<br> <br>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import time\n",
    "import sbmlxdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ecoli models\n",
    "\n",
    "xlsx_dir = 'xlsx_models/ecoli'\n",
    "sbml_dir = 'SBML_models/ecoli'\n",
    "\n",
    "# model_name = 'iJO1366_ccFBA'\n",
    "# model_name = 'iJO1366_MOMENT'\n",
    "# model_name = 'iJO1366_MOMENTmr'\n",
    "model_name = 'iJO1366_GECKO'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Yeast models\n",
    "xlsx_dir = 'xlsx_models/yeast'\n",
    "sbml_dir = 'SBML_models/yeast'\n",
    "\n",
    "model_name = 'ecYeastGEM_batch_8_3_4'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Synechocystis models\n",
    "xlsx_dir = 'xlsx_models/synechocystis'\n",
    "sbml_dir = 'SBML_models/synechocystis'\n",
    "\n",
    "model_name = 'iJN678_predicted_GECKO'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/peter/Documents/CCB/work/GECKO\n"
     ]
    }
   ],
   "source": [
    "print(os.getcwd())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## convert SBML model to xlsx model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SBML_models/synechocystis/iJN678_predicted_GECKO.xml imported\n",
      "last modified: Sun Oct 27 12:48:01 2024\n"
     ]
    }
   ],
   "source": [
    "sbml_name = os.path.join(sbml_dir, model_name) + '.xml'\n",
    "if os.path.exists(sbml_name):\n",
    "    model = sbmlxdf.Model(sbml_name)\n",
    "    print(sbml_name, 'imported')\n",
    "    print(\"last modified: %s\" % time.ctime(os.path.getmtime(sbml_name)))\n",
    "else:\n",
    "     print(f'ERROR, not found!!! {sbml_name}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.validate_sbml()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "xlsx_models/synechocystis/iJN678_predicted_GECKO.xlsx exported\n"
     ]
    }
   ],
   "source": [
    "xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n",
    "model.to_excel(xlsx_name)\n",
    "print(xlsx_name, 'exported')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## convert Excel to SBML model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "xlsx_models/eciJO1366_ccfba.xlsx imported\n",
      "last modified: Sat Jun 24 15:55:15 2023\n"
     ]
    }
   ],
   "source": [
    "xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n",
    "if os.path.exists(xlsx_name):\n",
    "    model = sbmlxdf.Model(xlsx_name)\n",
    "    print(xlsx_name, 'imported')\n",
    "    print(\"last modified: %s\" % time.ctime(os.path.getmtime(xlsx_name)))\n",
    "else:\n",
    "     print(f'ERROR, not found!!! {xlsx_name}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{}"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.validate_sbml()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SBML_models/eciJO1366_ccfba.xml exported\n"
     ]
    }
   ],
   "source": [
    "sbml_name = os.path.join(sbml_dir, model_name) + '.xml'\n",
    "model.export_sbml(sbml_name)\n",
    "print(sbml_name, 'exported')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## convert reactants - products table to reactants string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "xlsx_name = os.path.join(xlsx_dir, model_name) + '.xlsx'\n",
    "with pd.ExcelFile(xlsx_name) as xlsx:\n",
    "    df_reactions = pd.read_excel(xlsx, sheet_name = 'reactions')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "for idx, row in df_reactions.iterrows():\n",
    "    new_srefs = []\n",
    "    for sref in row['reactants'].split(';'):\n",
    "        params = sbmlxdf.extract_params(sref)\n",
    "        if 'stoic' not in params:\n",
    "            params['stoic'] = '1.0'\n",
    "        params['const'] = True\n",
    "        new_srefs.append(', '.join([f'{key}={val}' for key, val in params.items()]))\n",
    "    df_reactions.at[idx, 'new_reactants'] = '; '.join(new_srefs)  \n",
    "    new_srefs = []\n",
    "    for sref in row['products'].split(';'):\n",
    "        params = sbmlxdf.extract_params(sref)\n",
    "        if 'stoic' not in params:\n",
    "            params['stoic'] = '1.0'\n",
    "        params['const'] = True\n",
    "        new_srefs.append(', '.join([f'{key}={val}' for key, val in params.items()]))\n",
    "    df_reactions.at[idx, 'new_products'] = '; '.join(new_srefs)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>G_Q0045</th>\n",
       "      <td>Q0045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>G_Q0250</th>\n",
       "      <td>Q0250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>G_Q0275</th>\n",
       "      <td>Q0275</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>G_YDL067C</th>\n",
       "      <td>YDL067C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>G_YEL039C</th>\n",
       "      <td>YEL039C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             label\n",
       "id                \n",
       "G_Q0045      Q0045\n",
       "G_Q0250      Q0250\n",
       "G_Q0275      Q0275\n",
       "G_YDL067C  YDL067C\n",
       "G_YEL039C  YEL039C"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_gps = pd.DataFrame(gp2label.values(), index=gp2label, columns=['label'])\n",
    "df_gps.index.name='id'\n",
    "df_gps.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "xlsx_name = 'tmp.xlsx'\n",
    "with pd.ExcelWriter('tmp.xlsx') as writer:  \n",
    "    df_reactions.to_excel(writer, sheet_name='fbcGeneProducts')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "m_dict = model.to_df()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "value                       0.0\n",
       "units       mmol_per_gDW_per_hr\n",
       "constant                   True\n",
       "Name: fbc_0_bound, dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "m_dict['parameters'].iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "value                       0.1\n",
       "units       mmol_per_gDW_per_hr\n",
       "constant                   True\n",
       "Name: fbc_r_1001_lb, dtype: object"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s_bound = pd.Series([0.1, 'mmol_per_gDW_per_hr', True], index=['value', 'units', 'constant'])\n",
    "s_bound.name = 'fbc_r_1001_lb'\n",
    "s_bound"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.11 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}