Spaces:
Sleeping
Sleeping
| # AI-Powered Drug Discovery Pipeline Streamlit Application | |
| # This script integrates four phases of drug discovery into a single, interactive web app. | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import requests | |
| import io | |
| import re | |
| from PIL import Image | |
| import base64 | |
| # RDKit and BioPython imports | |
| from rdkit import Chem | |
| from rdkit.Chem import Draw, AllChem, Descriptors | |
| from Bio import SeqIO | |
| # Scikit-learn for ML models | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| # 3D Visualization | |
| import py3Dmol | |
| # Bokeh plotting | |
| from bokeh.plotting import figure | |
| from bokeh.models import ColumnDataSource, HoverTool | |
| from bokeh.layouts import gridplot | |
| from bokeh.transform import factor_cmap, cumsum | |
| from math import pi | |
| # Suppress warnings for cleaner output | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # --- Page Configuration --- | |
| st.set_page_config( | |
| page_title="AI Drug Discovery Pipeline", | |
| page_icon="π¬", | |
| layout="wide", | |
| initial_sidebar_state="collapsed", | |
| ) | |
| # Custom CSS for a professional, dark theme | |
| def apply_custom_styling(): | |
| st.markdown( | |
| """ | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap'); | |
| html, body, [class*="st-"] { | |
| font-family: 'Roboto', sans-serif; | |
| } | |
| .stApp { | |
| background-color: rgb(28, 28, 28); | |
| color: white; | |
| } | |
| /* Tab styles */ | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 24px; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| height: 50px; | |
| white-space: pre-wrap; | |
| background: none; | |
| border-radius: 0px; | |
| border-bottom: 2px solid #333; | |
| padding: 10px 4px; | |
| color: #AAA; | |
| } | |
| .stTabs [data-baseweb="tab"]:hover { | |
| background: #222; | |
| color: #FFF; | |
| } | |
| .stTabs [aria-selected="true"] { | |
| border-bottom: 2px solid #00A0FF; /* Highlight color for active tab */ | |
| color: #FFF; | |
| } | |
| /* Button styles */ | |
| .stButton>button { | |
| border-color: #00A0FF; | |
| color: #00A0FF; | |
| } | |
| .stButton>button:hover { | |
| border-color: #FFF; | |
| color: #FFF; | |
| background-color: #00A0FF; | |
| } | |
| /* Ensure headers are white */ | |
| h1, h2, h3, h4, h5, h6 { | |
| color: white !important; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| apply_custom_styling() | |
| # --- 2. Core Functions from All Phases --- | |
| # These functions are adapted from the provided Python scripts. | |
| # ===== Phase 1 Functions ===== | |
| def fetch_pdb_structure(pdb_id: str): | |
| """ | |
| Fetches a PDB file and returns its content. | |
| """ | |
| log = "" | |
| try: | |
| url = f"https://files.rcsb.org/download/{pdb_id}.pdb" | |
| response = requests.get(url, timeout=20) | |
| if response.status_code == 200: | |
| log += f"β Successfully fetched PDB data for {pdb_id}.\n" | |
| return response.text, log | |
| else: | |
| log += f"β οΈ Failed to fetch PDB file for {pdb_id} (Status: {response.status_code}). Please check the PDB ID and try again.\n" | |
| return None, log | |
| except Exception as e: | |
| log += f"β An error occurred while fetching PDB data: {e}\n" | |
| return None, log | |
| def fetch_fasta_sequence(protein_id: str): | |
| """ | |
| Fetches a protein's FASTA sequence from NCBI. | |
| """ | |
| log = "" | |
| try: | |
| url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id={protein_id}&rettype=fasta&retmode=text" | |
| response = requests.get(url, timeout=20) | |
| if response.status_code == 200: | |
| parsed_fasta = SeqIO.read(io.StringIO(response.text), "fasta") | |
| log += f"β Successfully fetched FASTA sequence for {protein_id}.\n\n" | |
| log += f"--- Protein Sequence Information ---\n" | |
| log += f"ID: {parsed_fasta.id}\n" | |
| log += f"Description: {parsed_fasta.description}\n" | |
| log += f"Sequence Length: {len(parsed_fasta.seq)}\n" | |
| log += f"Sequence Preview: {parsed_fasta.seq[:60]}...\n" | |
| return log | |
| else: | |
| log += f"β οΈ Failed to fetch FASTA file (Status: {response.status_code}).\n" | |
| return log | |
| except Exception as e: | |
| log += f"β An error occurred while fetching FASTA data: {e}\n" | |
| return log | |
| def visualize_protein_3d(pdb_data: str, title="Protein 3D Structure"): | |
| """ | |
| Generates an interactive 3D protein visualization using py3Dmol. | |
| """ | |
| if not pdb_data: | |
| return None, "Cannot generate 3D view: No PDB data provided." | |
| try: | |
| viewer = py3Dmol.view(width='100%', height=600) | |
| viewer.setBackgroundColor('#1C1C1C') | |
| viewer.addModel(pdb_data, "pdb") | |
| viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}}) | |
| viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'}) | |
| viewer.zoomTo() | |
| html = viewer._make_html() | |
| log = f"β Generated 3D visualization for {title}." | |
| return html, log | |
| except Exception as e: | |
| return None, f"β 3D visualization error: {e}" | |
| def create_sample_molecules(): | |
| """ | |
| Returns a dictionary of sample molecules in Name:SMILES format. | |
| Expanded list for more comprehensive demonstration. | |
| """ | |
| return { | |
| "Oseltamivir (Influenza)": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C", | |
| "Zanamivir (Influenza)": "C[C@H](N)C(=O)N[C@H]1[C@@H](O)C=C(O[C@H]1[C@@H](O)[C@H](O)CO)C(O)=O", | |
| "Aspirin (Pain/Inflammation)": "CC(=O)OC1=CC=CC=C1C(=O)O", | |
| "Ibuprofen (Pain/Inflammation)": "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", | |
| "Atorvastatin (Cholesterol)": "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1", # Lipitor | |
| "Metformin (Diabetes)": "CN(C)C(=N)N=C(N)N", | |
| "Loratadine (Antihistamine)": "CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1", | |
| "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1", # Complex structure, tyrosine kinase inhibitor | |
| "Amlodipine (Hypertension)": "CCC(COC(=O)c1cnc(C)c(c1C)C(=O)OC)c1ccc(Cl)cc1", # Calcium channel blocker | |
| "Rosuvastatin (Cholesterol)": "CC(C)c1ccc(cc1)S(=O)(=O)Nc1ncc(C)c(C(=O)O[C@H](C)[C@H](O)CC(=O)O)c1C", # Statin | |
| } | |
| def calculate_molecular_properties(smiles_dict: dict): | |
| """ | |
| Calculates key physicochemical properties for a dictionary of molecules using RDKit. | |
| """ | |
| properties = [] | |
| log = "" | |
| for name, smiles in smiles_dict.items(): | |
| mol = Chem.MolFromSmiles(smiles) | |
| if mol: | |
| props = { | |
| 'Molecule': name, | |
| 'SMILES': smiles, | |
| 'MW': Descriptors.MolWt(mol), | |
| 'LogP': Descriptors.MolLogP(mol), | |
| 'HBD': Descriptors.NumHDonors(mol), | |
| 'HBA': Descriptors.NumHAcceptors(mol), | |
| 'TPSA': Descriptors.TPSA(mol), | |
| 'RotBonds': Descriptors.NumRotatableBonds(mol), | |
| } | |
| properties.append(props) | |
| else: | |
| log += f"β οΈ Invalid SMILES string skipped for {name}: {smiles}\n" | |
| df = pd.DataFrame(properties).round(2) | |
| log += f"β Calculated properties for {len(df)} valid molecules.\n" | |
| return df, log | |
| def assess_drug_likeness(df: pd.DataFrame): | |
| """ | |
| Assesses drug-likeness based on Lipinski's Rule of Five. | |
| This version returns a boolean for plotting and a formatted string for display. | |
| """ | |
| if df.empty: | |
| return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data." | |
| analysis_df = df.copy() | |
| analysis_df['MW_OK'] = analysis_df['MW'] <= 500 | |
| analysis_df['LogP_OK'] = analysis_df['LogP'] <= 5 | |
| analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5 | |
| analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10 | |
| analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1) | |
| analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1 | |
| display_df = df.copy() | |
| display_df['Lipinski_Violations'] = analysis_df['Lipinski_Violations'] | |
| display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'β Yes' if x else 'β No') | |
| log = "β Assessed drug-likeness using Lipinski's Rule of Five.\n" | |
| return analysis_df, display_df, log | |
| def plot_properties_dashboard(df: pd.DataFrame): | |
| """Creates a professional 2x2 dashboard of molecular property visualizations using Bokeh.""" | |
| from math import pi, cos, sin | |
| if df.empty or 'Drug_Like' not in df.columns: | |
| return None, "Cannot plot: No analysis data or 'Drug_Like' column missing." | |
| if df['Drug_Like'].dtype != bool: | |
| return None, f"Cannot plot: 'Drug_Like' column must be boolean, but it is {df['Drug_Like'].dtype}." | |
| df['Category'] = df['Drug_Like'].apply(lambda x: 'Drug-Like' if x else 'Non-Drug-Like') | |
| source = ColumnDataSource(df) | |
| colors = ['#00D4AA', '#FF6B6B'] | |
| color_mapper = factor_cmap('Category', palette=colors, factors=["Drug-Like", "Non-Drug-Like"]) | |
| scatter_hover = HoverTool(tooltips=[ | |
| ("Compound", "@Molecule"), ("MW", "@MW{0.0} Da"), ("LogP", "@LogP{0.00}"), | |
| ("HBD", "@HBD"), ("HBA", "@HBA"), ("TPSA", "@TPSA{0.0} Γ Β²"), ("Category", "@Category") | |
| ]) | |
| plot_config = { | |
| 'sizing_mode': 'scale_width', 'aspect_ratio': 1, | |
| 'background_fill_color': None, 'border_fill_color': None, | |
| 'outline_line_color': '#333333', 'min_border_left': 50, | |
| 'min_border_right': 50, 'min_border_top': 50, 'min_border_bottom': 50 | |
| } | |
| def style_plot(p, x_label, y_label, title): | |
| """Apply consistent professional styling to plots.""" | |
| p.title.text = title | |
| p.title.text_color = '#FFFFFF' | |
| p.title.text_font_size = '14pt' | |
| p.title.text_font_style = 'bold' | |
| p.xaxis.axis_label = x_label | |
| p.yaxis.axis_label = y_label | |
| p.axis.axis_label_text_color = '#CCCCCC' | |
| p.axis.axis_label_text_font_size = '11pt' | |
| p.axis.major_label_text_color = '#AAAAAA' | |
| p.axis.major_label_text_font_size = '10pt' | |
| p.grid.grid_line_color = '#2A2A2A' | |
| p.grid.grid_line_alpha = 0.3 | |
| if p.legend: | |
| p.legend.location = "top_right" | |
| p.legend.background_fill_color = '#1A1A1A' | |
| p.legend.background_fill_alpha = 0.8 | |
| p.legend.border_line_color = '#444444' | |
| p.legend.label_text_color = '#FFFFFF' | |
| p.legend.click_policy = "mute" | |
| return p | |
| p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config) | |
| p1.scatter('MW', 'LogP', source=source, legend_group='Category', | |
| color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5) | |
| p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW β€ 500") | |
| p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP β€ 5") | |
| style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP") | |
| p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config) | |
| p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5) | |
| p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD β€ 5") | |
| p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA β€ 10") | |
| style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding") | |
| p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config) | |
| p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5) | |
| p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA β€ 140") | |
| p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds β€ 10") | |
| style_plot(p3, "Topological Polar Surface Area (Γ Β²)", "Rotatable Bonds", "Drug Permeability Indicators") | |
| p4_config = plot_config.copy() | |
| p4_config['tools'] = "hover" | |
| p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)}) | |
| p4 = figure(title="Drug-Likeness Distribution", **p4_config) | |
| # Calculate percentages for the doughnut chart | |
| counts = df['Category'].value_counts() | |
| data = pd.DataFrame({'category': counts.index, 'value': counts.values}) | |
| data['angle'] = data['value']/data['value'].sum() * 2*pi | |
| data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index] | |
| data['percentage'] = (data['value'] / data['value'].sum() * 100).round(1) | |
| # Calculate overall drug-like percentage for central text | |
| total_compounds = len(df) | |
| drug_like_count = df['Drug_Like'].sum() | |
| drug_like_percentage = (drug_like_count / total_compounds * 100) if total_compounds > 0 else 0 | |
| wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45, | |
| start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'), | |
| line_color="white", line_width=3, fill_color='color', | |
| legend_field='category', source=data) | |
| # Updated HoverTool to display percentage | |
| donut_hover = HoverTool(tooltips=[ | |
| ("Category", "@category"), | |
| ("Count", "@value"), | |
| ("Percentage", "@percentage{%0.1f}%%") # Display percentage with one decimal place | |
| ], renderers=[wedge_renderer]) | |
| p4.add_tools(donut_hover) | |
| # Updated central text to show Drug-Like percentage | |
| p4.text([0], [0], text=[f"{total_compounds}\nCompounds\n({drug_like_percentage:.1f}% Drug-Like)"], | |
| text_align="center", text_baseline="middle", text_color="white", text_font_size="10pt", text_font_style="bold") | |
| style_plot(p4, "", "", "Compound Classification") | |
| p4.axis.visible = False | |
| p4.grid.visible = False | |
| grid = gridplot([[p1, p2], [p3, p4]], sizing_mode='scale_width', toolbar_location='right', merge_tools=True) | |
| return grid, "β Generated enhanced molecular properties dashboard." | |
| # ===== Phase 2 Functions ===== | |
| def get_phase2_molecules(): | |
| """ | |
| Returns an expanded list of common drugs with corrected SMILES for virtual screening. | |
| These are chosen to be well-known and diverse in their therapeutic areas. | |
| """ | |
| return { | |
| 'Paracetamol (Analgesic)': 'CC(=O)Nc1ccc(O)cc1', | |
| 'Ibuprofen (Pain/Inflammation)': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1', | |
| 'Aspirin (Pain/Antiplatelet)': 'CC(=O)Oc1ccccc1C(=O)O', | |
| 'Naproxen (Pain/Inflammation)': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1', | |
| 'Diazepam (Anxiolytic)': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12', | |
| 'Metformin (Diabetes)': 'CN(C)C(=N)N=C(N)N', | |
| 'Loratadine (Antihistamine)': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1', | |
| 'Morphine (Opioid Analgesic)': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5', | |
| 'Cetirizine (Antihistamine)': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO', | |
| 'Fluoxetine (Antidepressant)': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1', | |
| 'Amoxicillin (Antibiotic)': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C', | |
| 'Atorvastatin (Cholesterol)': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1', | |
| 'Ciprofloxacin (Antibiotic)': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12', | |
| 'Warfarin (Anticoagulant)': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C', | |
| 'Furosemide (Diuretic)': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N', | |
| 'Sildenafil (Erectile Dysfunction)': 'CCCC1=NN(C)C(=NC1=O)c1cc(N2CCN(C)CC2)c(OC)cc1S(=O)(=O)C', | |
| 'Omeprazole (GERD)': 'COc1ccc(C)c(c1NC(=O)c1cn(Cc2ccc(OC)cc2)cn1)OC', # Proton pump inhibitor | |
| 'Losartan (Hypertension)': 'Cc1cnc(n1C)c1ccc(cc1)-c1ccccc1COC(=O)c1ccccc1', # Angiotensin Receptor Blocker | |
| } | |
| def simulate_virtual_screening(smiles_dict: dict): | |
| np.random.seed(42) | |
| scores = np.random.uniform(2.0, 9.8, len(smiles_dict)) | |
| results = [{'Molecule': name, 'SMILES': smiles, 'Predicted_Binding_Affinity': round(score, 2)} for (name, smiles), score in zip(smiles_dict.items(), scores)] | |
| df = pd.DataFrame(results).sort_values('Predicted_Binding_Affinity', ascending=False).reset_index(drop=True) | |
| df['Ranking'] = df.index + 1 | |
| return df, f"β Simulated virtual screening for {len(df)} molecules.\n" | |
| def predict_admet_properties(smiles_dict: dict): | |
| admet_data = [] | |
| log = "" | |
| for i, (name, smiles) in enumerate(smiles_dict.items()): | |
| mol = Chem.MolFromSmiles(smiles) | |
| if not mol: continue | |
| mw, logp, hbd, hba = Descriptors.MolWt(mol), Descriptors.MolLogP(mol), Descriptors.NumHDonors(mol), Descriptors.NumHAcceptors(mol) | |
| np.random.seed(42 + i) | |
| admet_data.append({'Molecule': name, 'MW': round(mw, 2), 'LogP': round(logp, 2), 'HBD': hbd, 'HBA': hba, | |
| 'Solubility (logS)': round(np.random.uniform(-4, -1), 2), | |
| 'Toxicity Risk': round(np.random.uniform(0.05, 0.4), 3), | |
| 'Lipinski Violations': sum([mw > 500, logp > 5, hbd > 5, hba > 10])}) | |
| df = pd.DataFrame(admet_data) | |
| log += f"β Predicted ADMET properties for {len(df)} molecules.\n" | |
| return df, log | |
| def visualize_molecule_2d_3d(smiles: str, name: str): | |
| """Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule.""" | |
| log = "" | |
| try: | |
| mol = Chem.MolFromSmiles(smiles) | |
| if not mol: return f"<p>Invalid SMILES for {name}</p>", f"β Invalid SMILES for {name}" | |
| drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300) | |
| # Set dark theme colors for 2D drawing | |
| drawer.drawOptions().clearBackground = False | |
| drawer.drawOptions().addStereoAnnotation = True | |
| drawer.drawOptions().baseFontSize = 0.8 | |
| drawer.drawOptions().circleAtoms = False | |
| drawer.drawOptions().highlightColour = (1, 0.5, 0) # Orange for highlights | |
| # Set colors for dark background visibility | |
| drawer.drawOptions().backgroundColour = (0.11, 0.11, 0.11) # Dark background | |
| drawer.drawOptions().symbolColour = (1, 1, 1) # White symbols | |
| drawer.drawOptions().defaultColour = (1, 1, 1) # White default color | |
| # Try to set annotation color (this might help with (R)/(S) labels) | |
| try: | |
| drawer.drawOptions().annotationColour = (1, 1, 1) # White annotations | |
| except: | |
| pass | |
| drawer.DrawMolecule(mol) | |
| drawer.FinishDrawing() | |
| svg_2d = drawer.GetDrawingText().replace('svg:', '') | |
| # More aggressive SVG text color fixes - target all possible black text variations | |
| # First, comprehensive string replacements | |
| svg_2d = svg_2d.replace('stroke="black"', 'stroke="white"') | |
| svg_2d = svg_2d.replace('fill="black"', 'fill="white"') | |
| svg_2d = svg_2d.replace('stroke="#000000"', 'stroke="#FFFFFF"') | |
| svg_2d = svg_2d.replace('fill="#000000"', 'fill="#FFFFFF"') | |
| svg_2d = svg_2d.replace('stroke="#000"', 'stroke="#FFF"') | |
| svg_2d = svg_2d.replace('fill="#000"', 'fill="#FFF"') | |
| svg_2d = svg_2d.replace('stroke:black', 'stroke:white') | |
| svg_2d = svg_2d.replace('fill:black', 'fill:white') | |
| svg_2d = svg_2d.replace('stroke:#000000', 'stroke:#FFFFFF') | |
| svg_2d = svg_2d.replace('fill:#000000', 'fill:#FFFFFF') | |
| svg_2d = svg_2d.replace('stroke:#000', 'stroke:#FFF') | |
| svg_2d = svg_2d.replace('fill:#000', 'fill="#FFF"') | |
| svg_2d = svg_2d.replace('stroke="rgb(0,0,0)"', 'stroke="rgb(255,255,255)"') | |
| svg_2d = svg_2d.replace('fill="rgb(0,0,0)"', 'fill="rgb(255,255,255)"') | |
| svg_2d = svg_2d.replace('stroke:rgb(0,0,0)', 'stroke:rgb(255,255,255)') | |
| svg_2d = svg_2d.replace('fill:rgb(0,0,0)', 'fill:rgb(255,255,255)') | |
| svg_2d = svg_2d.replace('color="black"', 'color="white"') | |
| svg_2d = svg_2d.replace('color:#000000', 'color:#FFFFFF') | |
| svg_2d = svg_2d.replace('color:#000', 'color:#FFF') | |
| # Aggressive regex-based fixes for all text elements | |
| # Remove any existing fill attributes from text elements and add white fill | |
| svg_2d = re.sub(r'<text([^>]*?)\s+fill="[^"]*"([^>]*?)>', r'<text\1\2 fill="white">', svg_2d) | |
| svg_2d = re.sub(r'<text([^>]*?)(?<!fill="white")>', r'<text\1 fill="white">', svg_2d) | |
| # Fix style attributes in text elements | |
| svg_2d = re.sub(r'<text([^>]*?)style="([^"]*?)fill:\s*(?:black|#000000|#000|rgb\(0,0,0\))([^"]*?)"([^>]*?)>', | |
| r'<text\1style="\2fill:white\3"\4>', svg_2d) | |
| # If text elements don't have any fill specified, ensure they get white | |
| svg_2d = re.sub(r'<text(?![^>]*fill=)([^>]*?)>', r'<text fill="white"\1>', svg_2d) | |
| # Clean up any duplicate fill attributes | |
| svg_2d = re.sub(r'fill="white"\s+fill="white"', 'fill="white"', svg_2d) | |
| # Final catch-all: replace any remaining black in the entire SVG | |
| svg_2d = re.sub(r'\bblack\b', 'white', svg_2d) | |
| svg_2d = re.sub(r'#000000', '#FFFFFF', svg_2d) | |
| svg_2d = re.sub(r'#000\b', '#FFF', svg_2d) | |
| svg_2d = re.sub(r'rgb\(0,\s*0,\s*0\)', 'rgb(255,255,255)', svg_2d) | |
| # Embed the SVG within a div with a dark background for consistency | |
| svg_2d = f'<div style="background-color: #1C1C1C; padding: 10px; border-radius: 5px;">{svg_2d}</div>' | |
| mol_3d = Chem.AddHs(mol) | |
| AllChem.EmbedMolecule(mol_3d, randomSeed=42) | |
| AllChem.MMFFOptimizeMolecule(mol_3d) | |
| sdf_data = Chem.MolToMolBlock(mol_3d) | |
| viewer = py3Dmol.view(width=400, height=300) | |
| viewer.setBackgroundColor('#1C1C1C') | |
| viewer.addModel(sdf_data, "sdf") | |
| viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}}) | |
| viewer.zoomTo() | |
| html_3d = viewer._make_html() | |
| combined_html = f""" | |
| <div style="display: flex; flex-direction: row; align-items: center; justify-content: space-around; border: 1px solid #444; border-radius: 10px; padding: 10px; margin-bottom: 10px; background-color: #2b2b2b;"> | |
| <div style="text-align: center;"> | |
| <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (2D Structure)</h4> | |
| {svg_2d} | |
| </div> | |
| <div style="text-align: center;"> | |
| <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (3D Interactive)</h4> | |
| {html_3d} | |
| </div> | |
| </div> | |
| """ | |
| log += f"β Generated 2D/3D view for {name}.\n" | |
| return combined_html, log | |
| except Exception as e: | |
| return f"<p>Error visualizing {name}: {e}</p>", f"β Error visualizing {name}: {e}" | |
| def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str): | |
| """ | |
| Generates a protein-ligand interaction visualization using py3Dmol. | |
| """ | |
| if not pdb_data: | |
| return None, "Cannot generate interaction view: No PDB data provided." | |
| try: | |
| viewer = py3Dmol.view(width='100%', height=650) | |
| viewer.setBackgroundColor('#1C1C1C') | |
| # Add the protein structure | |
| viewer.addModel(pdb_data, "pdb") | |
| # Style the protein (cartoon representation) | |
| viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}}) | |
| # Highlight the ligand if specified | |
| if ligand_resn: | |
| viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}}) | |
| viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}}) | |
| # Add surface representation for binding site | |
| viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn}) | |
| viewer.zoomTo({'resn': ligand_resn} if ligand_resn else {}) | |
| html = viewer._make_html() | |
| log = f"β Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}." | |
| return html, log | |
| except Exception as e: | |
| return None, f"β Interaction visualization error: {e}" | |
| # ===== Phase 3 Functions ===== | |
| def get_phase3_molecules(): | |
| """ | |
| Returns an expanded list of lead compounds for optimization. | |
| These are chosen to be representative of active pharmaceutical ingredients or advanced candidates. | |
| """ | |
| return { | |
| 'Oseltamivir (Influenza)': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C', | |
| 'Aspirin (Pain/Antiplatelet)': 'CC(=O)OC1=CC=CC=C1C(=O)O', | |
| 'Remdesivir (Antiviral)': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4', | |
| 'Penicillin G (Antibiotic)': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C', | |
| "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1", | |
| "Sorafenib (Kinase Inhibitor)": "Clc1cccc(Cl)c1OC(=O)Nc1ccc(nc1)NC(=O)C(C)(C)C", # Multi-kinase inhibitor for cancer | |
| # CORRECTED SMILES for Venetoclax | |
| "Venetoclax (BCL-2 Inhibitor)": "CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC(=C(C=C4)C(=O)NS(=O)(=O)C5=CC(=C(C=C5)NCC6CCOCC6)[N+](=O)[O-])OC7=CN=C8C(=C7)C=CN8)C", | |
| "Dasatinib (Kinase Inhibitor)": "CC1=NC(=NC=C1SC2=NC=C(C=N2)C(=O)NC3=CC=CC(=C3)N)C(=O)O", # Multi-kinase inhibitor for leukemia | |
| } | |
| def calculate_comprehensive_properties(smiles_dict: dict): | |
| analysis = [] | |
| log = "" | |
| for name, smiles in smiles_dict.items(): | |
| mol = Chem.MolFromSmiles(smiles) | |
| if not mol: continue | |
| mw, logp, hbd, hba = Descriptors.MolWt(mol), Descriptors.MolLogP(mol), Descriptors.NumHDonors(mol), Descriptors.NumHAcceptors(mol) | |
| violations = sum([mw > 500, logp > 5, hbd > 5, hba > 10]) | |
| analysis.append({'Compound': name, 'Molecular_Weight': mw, 'LogP': logp, 'HBD': hbd, 'HBA': hba, | |
| 'TPSA': Descriptors.TPSA(mol), 'Rotatable_Bonds': Descriptors.NumRotatableBonds(mol), | |
| 'Aromatic_Rings': Descriptors.NumAromaticRings(mol), | |
| 'Lipinski_Violations': violations, | |
| 'Drug_Like': 'β Yes' if violations <= 1 else 'β No'}) | |
| df = pd.DataFrame(analysis).round(2) | |
| log += f"β Calculated comprehensive properties for {len(df)} compounds.\n" | |
| return df, log | |
| def predict_toxicity(properties_df: pd.DataFrame): | |
| if properties_df.empty: return pd.DataFrame(), "Cannot predict toxicity: No properties data." | |
| np.random.seed(42) | |
| n_compounds = 500 | |
| training_data = pd.DataFrame({'molecular_weight': np.random.normal(400, 100, n_compounds), | |
| 'logp': np.random.normal(2.5, 1.5, n_compounds), | |
| 'tpsa': np.random.normal(80, 30, n_compounds), | |
| 'rotatable_bonds': np.random.randint(0, 15, n_compounds), | |
| 'aromatic_rings': np.random.randint(0, 5, n_compounds)}) | |
| toxicity_score = ((training_data['molecular_weight'] > 550) * 0.4 + (abs(training_data['logp']) > 4.5) * 0.4 + np.random.random(n_compounds) * 0.2) | |
| training_data['toxic'] = (toxicity_score > 0.5).astype(int) | |
| features = ['molecular_weight', 'logp', 'tpsa', 'rotatable_bonds', 'aromatic_rings'] | |
| rf_model = RandomForestClassifier(n_estimators=50, random_state=42) | |
| rf_model.fit(training_data[features], training_data['toxic']) | |
| X_pred = properties_df[['Molecular_Weight', 'LogP', 'TPSA', 'Rotatable_Bonds', 'Aromatic_Rings']] | |
| X_pred.columns = features | |
| toxicity_prob = rf_model.predict_proba(X_pred)[:, 1] | |
| results_df = properties_df[['Compound']].copy() | |
| results_df['Toxicity_Probability'] = np.round(toxicity_prob, 3) | |
| results_df['Predicted_Risk'] = ["π’ LOW" if p < 0.3 else "π‘ MODERATE" if p < 0.7 else "π΄ HIGH" for p in toxicity_prob] | |
| return results_df, "β Predicted toxicity using a pre-trained simulation model.\n" | |
| # ===== Phase 4 Functions ===== | |
| def get_regulatory_summary(): | |
| summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'], | |
| 'Description': ['Data sourced from ChEMBL, PDB, GISAID. Bias assessed via geographic distribution analysis.', | |
| 'Graph Convolutional Network (Target ID), Random Forest (ADMET), K-Means (Patient Stratification).', | |
| 'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.', | |
| 'SHAP used for patient stratification model outputs.']} | |
| return pd.DataFrame(summary), "β Generated AI/ML documentation summary." | |
| def simulate_rwd_analysis(adverse_event_text): | |
| """ | |
| Analyzes simulated adverse event text and generates a DataFrame and Bokeh plot. | |
| """ | |
| np.random.seed(42) | |
| base_events = list(np.random.choice( | |
| ['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever', 'diarrhea', 'constipation', 'insomnia', 'muscle pain'], | |
| 100, | |
| p=[0.2, 0.15, 0.12, 0.12, 0.1, 0.08, 0.08, 0.05, 0.05, 0.05] # Adjusted probabilities for new events | |
| )) | |
| user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)] | |
| all_events = base_events + user_terms | |
| events_df = pd.DataFrame(all_events, columns=['Adverse_Event']) | |
| event_counts = events_df['Adverse_Event'].value_counts().nlargest(10).sort_values(ascending=False) | |
| results_df = event_counts.reset_index() | |
| results_df.columns = ['Adverse_Event', 'Frequency'] | |
| log = f"β Analyzed {len(all_events)} total event reports. Identified {len(event_counts)} unique adverse events for plotting.\n" | |
| # Create Bokeh Plot | |
| source = ColumnDataSource(results_df) | |
| y_range = results_df['Adverse_Event'].tolist()[::-1] | |
| hover = HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")]) | |
| p = figure( | |
| y_range=y_range, height=450, title="Top 10 Reported Adverse Events", | |
| sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save", | |
| ) | |
| p.add_tools(hover) | |
| p.hbar( | |
| y='Adverse_Event', right='Frequency', source=source, height=0.7, | |
| color='#00A0FF', line_color='white', legend_label="Event Frequency" | |
| ) | |
| # Style the plot for a dark theme | |
| p.background_fill_color = "#1C1C1C" | |
| p.border_fill_color = "#1C1C1C" | |
| p.outline_line_color = '#333333' | |
| p.title.text_color = "white" | |
| p.title.text_font_size = '16pt' | |
| p.title.align = "center" | |
| p.xaxis.axis_label = "Frequency Count" | |
| p.yaxis.axis_label = "Adverse Event" | |
| p.axis.axis_label_text_color = "#CCCCCC" | |
| p.axis.axis_label_text_font_size = "12pt" | |
| p.axis.major_label_text_color = "#AAAAAA" | |
| p.axis.major_label_text_font_size = "10pt" | |
| p.grid.grid_line_alpha = 0.3 | |
| p.grid.grid_line_color = "#444444" | |
| p.x_range.start = 0 | |
| p.legend.location = "top_right" | |
| p.legend.background_fill_color = "#2A2A2A" | |
| p.legend.background_fill_alpha = 0.7 | |
| p.legend.border_line_color = "#444444" | |
| p.legend.label_text_color = "white" | |
| return results_df, p, log | |
| def get_ethical_framework(): | |
| framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'], | |
| 'Implementation Strategy': [ | |
| 'AI models prioritize patient outcomes and clinical efficacy.', | |
| 'Toxicity prediction and pharmacovigilance models aim to minimize patient harm.', | |
| 'Algorithms are audited for demographic bias in training data and predictions.', | |
| 'Model cards and SHAP values are provided for key decision-making processes.' | |
| ]} | |
| return pd.DataFrame(framework), "β Generated Ethical AI Framework summary." | |
| # --- 3. Streamlit UI Layout --- | |
| # Initialize session state variables | |
| if 'active_tab' not in st.session_state: st.session_state.active_tab = "Phase 1: Target Identification" | |
| if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here." | |
| if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here." | |
| if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here." | |
| if 'log_p4' not in st.session_state: st.session_state.log_p4 = "Status logs will appear here." | |
| if 'results_p1' not in st.session_state: st.session_state.results_p1 = {} | |
| if 'results_p2' not in st.session_state: st.session_state.results_p2 = {} | |
| if 'results_p3' not in st.session_state: st.session_state.results_p3 = {} | |
| if 'results_p4' not in st.session_state: st.session_state.results_p4 = {} | |
| # --- Header --- | |
| st.title("π¬ AI-Powered Drug Discovery Pipeline") | |
| st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.") | |
| # --- Main Tabs for Each Phase --- | |
| tab1, tab2, tab3, tab4 = st.tabs([ | |
| "**Phase 1:** Target Identification", | |
| "**Phase 2:** Hit Discovery & ADMET", | |
| "**Phase 3:** Lead Optimization", | |
| "**Phase 4:** Pre-clinical & RWE" | |
| ]) | |
| # --- Phase 1: Target Identification --- | |
| with tab1: | |
| st.header("Phase 1: Target Identification & Initial Analysis") | |
| st.markdown(""" | |
| In this initial phase, we identify and analyze a biological target (e.g., a protein) implicated in a disease. | |
| We fetch its 3D structure and sequence data, then evaluate a set of initial compounds for their drug-like properties. | |
| """) | |
| st.subheader("Inputs & Controls") | |
| # Updated PDB ID options | |
| pdb_options = { | |
| "Neuraminidase (Influenza - 2HU4)": "2HU4", | |
| "KRAS G12D (Oncogenic Target - 7XKJ)": "7XKJ", # Bound to MRTX-1133 | |
| "SARS-CoV-2 Mpro (Antiviral Target - 8HUR)": "8HUR", # Bound to Ensitrelvir | |
| "EGFR Kinase (Cancer Target - 1M17)": "1M17", # Bound to Erlotinib | |
| } | |
| selected_pdb_name = st.selectbox("Select PDB ID:", options=list(pdb_options.keys()), index=0) | |
| pdb_id_input = pdb_options[selected_pdb_name] | |
| # Updated NCBI Protein ID options | |
| protein_options = { | |
| "Neuraminidase (P03468)": "P03468", # Influenza A virus (A/PR/8/34) | |
| "KRAS (P01116)": "P01116", # Human KRAS | |
| "SARS-CoV-2 Main Protease (P0DTD1)": "P0DTD1", # SARS-CoV-2 Mpro | |
| "EGFR (P00533)": "P00533", # Human Epidermal Growth Factor Receptor | |
| } | |
| selected_protein_name = st.selectbox("Select NCBI Protein ID:", options=list(protein_options.keys()), index=0) | |
| protein_id_input = protein_options[selected_protein_name] | |
| st.markdown("---") | |
| st.write("**Analyze Sample Compounds:**") | |
| sample_molecules = create_sample_molecules() | |
| selected_molecules = st.multiselect( | |
| "Select from known drugs:", | |
| options=list(sample_molecules.keys()), | |
| default=["Oseltamivir (Influenza)", "Aspirin (Pain/Inflammation)", "Imatinib (Gleevec - Cancer)"] # Adjusted default selection | |
| ) | |
| if st.button("π Run Phase 1 Analysis", key="run_p1"): | |
| with st.spinner("Fetching data and calculating properties..."): | |
| full_log = "--- Phase 1 Analysis Started ---\n" | |
| pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input) | |
| full_log += log_pdb | |
| log_fasta = fetch_fasta_sequence(protein_id_input) | |
| full_log += log_fasta | |
| smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules} | |
| properties_df, log_props = calculate_molecular_properties(smiles_to_analyze) | |
| full_log += log_props | |
| analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df) | |
| full_log += log_likeness | |
| protein_view_html, log_3d = visualize_protein_3d(pdb_data, title=f"PDB: {pdb_id_input}") | |
| full_log += log_3d | |
| dashboard_plot, log_dash = plot_properties_dashboard(analysis_df) | |
| full_log += log_dash | |
| full_log += "\n--- Phase 1 Analysis Complete ---" | |
| st.session_state.log_p1 = full_log | |
| st.session_state.results_p1 = { | |
| 'pdb_data': pdb_data, | |
| 'protein_view': protein_view_html, | |
| 'properties_df': display_df, | |
| 'dashboard': dashboard_plot | |
| } | |
| st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area") | |
| st.subheader("Results") | |
| if not st.session_state.results_p1: | |
| st.info("Click 'Run Phase 1 Analysis' to generate and display results.") | |
| else: | |
| res1 = st.session_state.results_p1 | |
| p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"]) | |
| with p1_tabs[0]: | |
| st.subheader(f"3D Structure for PDB ID: {pdb_id_input}") | |
| if res1.get('protein_view'): | |
| st.components.v1.html(res1['protein_view'], height=600, scrolling=False) | |
| else: | |
| st.warning("Could not display 3D structure. Check PDB ID and logs.") | |
| with p1_tabs[1]: | |
| st.subheader("Physicochemical Properties Analysis") | |
| # The data table is now displayed *before* the dashboard. | |
| st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| if res1.get('dashboard'): | |
| st.bokeh_chart(res1['dashboard'], use_container_width=True) | |
| # --- Phase 2: Hit Discovery & ADMET --- | |
| with tab2: | |
| st.header("Phase 2: Virtual Screening & Early ADMET") | |
| st.markdown(""" | |
| This phase simulates a virtual screening process to identify 'hits' from a larger library of compounds. | |
| We predict their binding affinity to the target and assess their basic ADMET (Absorption, Distribution, | |
| Metabolism, Excretion, Toxicity) profiles. | |
| """) | |
| st.subheader("Inputs & Controls") | |
| p2_molecules = get_phase2_molecules() | |
| st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.") | |
| # Updated PDB ID for Interaction options | |
| interaction_pdb_options = { | |
| "Neuraminidase + Oseltamivir (2HU4)": {"pdb": "2HU4", "ligand": "G39"}, | |
| "KRAS G12C + MRTX-1133 (7XKJ)": {"pdb": "7XKJ", "ligand": "M13"}, | |
| "SARS-CoV-2 Mpro + Ensitrelvir (8HUR)": {"pdb": "8HUR", "ligand": "X77"}, | |
| "EGFR + Erlotinib (1M17)": {"pdb": "1M17", "ligand": "ERL"}, | |
| } | |
| selected_interaction_pdb_name = st.selectbox( | |
| "Select PDB ID for Interaction:", | |
| options=list(interaction_pdb_options.keys()), | |
| index=0 # Default to Neuraminidase | |
| ) | |
| p2_pdb_id = interaction_pdb_options[selected_interaction_pdb_name]["pdb"] | |
| p2_ligand_resn = interaction_pdb_options[selected_interaction_pdb_name]["ligand"] | |
| st.write(f"Selected PDB: `{p2_pdb_id}`, Selected Ligand Residue Name: `{p2_ligand_resn}`") | |
| if st.button("π Run Phase 2 Analysis", key="run_p2"): | |
| with st.spinner("Running virtual screening and ADMET predictions..."): | |
| full_log = "--- Phase 2 Analysis Started ---\n" | |
| screening_df, log_screen = simulate_virtual_screening(p2_molecules) | |
| full_log += log_screen | |
| admet_df, log_admet = predict_admet_properties(p2_molecules) | |
| full_log += log_admet | |
| merged_df = pd.merge(screening_df, admet_df, on="Molecule") | |
| pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id) | |
| full_log += log_pdb_p2 | |
| interaction_view, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn) | |
| full_log += log_interact | |
| full_log += "\n--- Phase 2 Analysis Complete ---" | |
| st.session_state.log_p2 = full_log | |
| st.session_state.results_p2 = { | |
| 'merged_df': merged_df, | |
| 'interaction_view': interaction_view | |
| } | |
| st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area") | |
| st.subheader("Results") | |
| if not st.session_state.results_p2: | |
| st.info("Click 'Run Phase 2 Analysis' to generate and display results.") | |
| else: | |
| res2 = st.session_state.results_p2 | |
| p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"]) | |
| with p2_tabs[0]: | |
| st.subheader("Virtual Screening & Early ADMET Predictions") | |
| st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with p2_tabs[1]: | |
| st.subheader(f"Simulated Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}") | |
| if res2.get('interaction_view'): | |
| st.components.v1.html(res2['interaction_view'], height=700, scrolling=False) | |
| else: | |
| st.warning("Could not display interaction view. Check inputs and logs.") | |
| # --- Phase 3: Lead Optimization --- | |
| with tab3: | |
| st.header("Phase 3: Lead Compound Optimization") | |
| st.markdown(""" | |
| In lead optimization, promising 'hit' compounds are refined to improve their efficacy and safety. | |
| Here, we analyze a few selected lead candidates, perform more detailed property calculations, | |
| and predict their toxicity risk using a simulated machine learning model. | |
| """) | |
| st.subheader("Inputs & Controls") | |
| p3_molecules = get_phase3_molecules() | |
| selected_leads = st.multiselect( | |
| "Select lead compounds to optimize:", | |
| options=list(p3_molecules.keys()), | |
| default=['Oseltamivir (Influenza)', 'Remdesivir (Antiviral)', 'Imatinib (Gleevec - Cancer)'] # Adjusted default selection | |
| ) | |
| if st.button("π Run Phase 3 Analysis", key="run_p3"): | |
| with st.spinner("Analyzing lead compounds and predicting toxicity..."): | |
| full_log = "--- Phase 3 Analysis Started ---\n" | |
| smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads} | |
| comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3) | |
| full_log += log_comp | |
| toxicity_df, log_tox = predict_toxicity(comp_props_df) | |
| full_log += log_tox | |
| final_df = pd.merge(comp_props_df, toxicity_df, on="Compound") | |
| visuals = {} | |
| for name, smiles in smiles_to_analyze_p3.items(): | |
| html_view, log_vis = visualize_molecule_2d_3d(smiles, name) | |
| visuals[name] = html_view | |
| full_log += log_vis | |
| full_log += "\n--- Phase 3 Analysis Complete ---" | |
| st.session_state.log_p3 = full_log | |
| st.session_state.results_p3 = { | |
| 'final_df': final_df, | |
| 'visuals': visuals | |
| } | |
| st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area") | |
| st.subheader("Results") | |
| if not st.session_state.results_p3: | |
| st.info("Click 'Run Phase 3 Analysis' to generate and display results.") | |
| else: | |
| # Corrected from results_3 to results_p3 | |
| res3 = st.session_state.results_p3 | |
| st.subheader("Lead Compound Analysis & Toxicity Prediction") | |
| st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| st.subheader("2D & 3D Molecular Structures") | |
| for name, visual_html in res3.get('visuals', {}).items(): | |
| st.components.v1.html(visual_html, height=430, scrolling=False) | |
| # --- Phase 4: Pre-clinical & RWE --- | |
| with tab4: | |
| st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)") | |
| st.markdown(""" | |
| This final phase simulates post-market analysis. We analyze text data for adverse events (pharmacovigilance) | |
| and present documentation related to the AI models and ethical frameworks that would be required for regulatory submission. | |
| """) | |
| st.subheader("Inputs & Controls") | |
| rwd_input = st.text_area( | |
| "Enter simulated adverse event report text:", | |
| "Patient reports include instances of headache, severe nausea, and occasional skin rash. Some noted dizziness after taking the medication.", | |
| height=150 | |
| ) | |
| if st.button("π Run Phase 4 Analysis", key="run_p4"): | |
| with st.spinner("Analyzing real-world data and generating reports..."): | |
| full_log = "--- Phase 4 Analysis Started ---\n" | |
| reg_df, log_reg = get_regulatory_summary() | |
| full_log += log_reg | |
| eth_df, log_eth = get_ethical_framework() | |
| full_log += log_eth | |
| rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input) | |
| full_log += log_rwd | |
| full_log += "\n--- Phase 4 Analysis Complete ---" | |
| st.session_state.log_p4 = full_log | |
| st.session_state.results_p4 = { | |
| 'rwd_df': rwd_df, | |
| 'plot_bar': plot_bar, | |
| 'reg_df': reg_df, | |
| 'eth_df': eth_df | |
| } | |
| st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area") | |
| st.subheader("Results") | |
| if not st.session_state.results_p4: | |
| st.info("Click 'Run Phase 4 Analysis' to generate and display results.") | |
| else: | |
| res4 = st.session_state.results_p4 | |
| p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"]) | |
| with p4_tabs[0]: | |
| st.subheader("Simulated Adverse Event Analysis") | |
| if res4.get('plot_bar'): | |
| st.bokeh_chart(res4['plot_bar'], use_container_width=True) | |
| st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with p4_tabs[1]: | |
| st.subheader("AI/ML Model Regulatory Summary") | |
| st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| st.subheader("Ethical AI Framework") | |
| st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |