diff --git a/Makefile b/Makefile
index 3835fb4..373e155 100644
--- a/Makefile
+++ b/Makefile
@@ -80,6 +80,11 @@ benchmark: requirements
 	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.benchmark main $(MPNN_FLAG) $(DEVICE_FLAG)
 	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.benchmark test $(DEVICE_FLAG)
 
+## Evaluate baseline method on public test splits (test.csv vs preds.csv in cv_*)
+.PHONY: baseline
+baseline: requirements
+	$(PYTHON_INTERPRETER) scripts/evaluate_external.py
+
 #################################################################################
 # TRAINING (Nested CV + Optuna)                                                #
 #################################################################################
diff --git a/lnp_ml/features.py b/lnp_ml/features.py
deleted file mode 100644
index f103db7..0000000
--- a/lnp_ml/features.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from pathlib import Path
-
-from loguru import logger
-from tqdm import tqdm
-import typer
-
-from lnp_ml.config import PROCESSED_DATA_DIR
-
-app = typer.Typer()
-
-
-@app.command()
-def main(
-    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
-    input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
-    output_path: Path = PROCESSED_DATA_DIR / "features.csv",
-    # -----------------------------------------
-):
-    # ---- REPLACE THIS WITH YOUR OWN CODE ----
-    logger.info("Generating features from dataset...")
-    for i in tqdm(range(10), total=10):
-        if i == 5:
-            logger.info("Something happened for iteration 5.")
-    logger.success("Features generation complete.")
-    # -----------------------------------------
-
-
-if __name__ == "__main__":
-    app()
diff --git a/lnp_ml/plots.py b/lnp_ml/plots.py
deleted file mode 100644
index 34628ab..0000000
--- a/lnp_ml/plots.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from pathlib import Path
-
-from loguru import logger
-from tqdm import tqdm
-import typer
-
-from lnp_ml.config import FIGURES_DIR, PROCESSED_DATA_DIR
-
-app = typer.Typer()
-
-
-@app.command()
-def main(
-    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
-    input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
-    output_path: Path = FIGURES_DIR / "plot.png",
-    # -----------------------------------------
-):
-    # ---- REPLACE THIS WITH YOUR OWN CODE ----
-    logger.info("Generating plot from data...")
-    for i in tqdm(range(10), total=10):
-        if i == 5:
-            logger.info("Something happened for iteration 5.")
-    logger.success("Plot generation complete.")
-    # -----------------------------------------
-
-
-if __name__ == "__main__":
-    app()
diff --git a/scripts/data_cleaning.py b/scripts/data_cleaning.py
deleted file mode 100644
index 8858626..0000000
--- a/scripts/data_cleaning.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""数据清洗脚本：修正原始数据中的问题"""
-
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-import typer
-from loguru import logger
-
-from lnp_ml.config import RAW_DATA_DIR, INTERIM_DATA_DIR
-
-
-app = typer.Typer()
-
-
-@app.command()
-def main(
-    input_path: Path = RAW_DATA_DIR / "internal_deleted_uncorrected.xlsx",
-    output_path: Path = INTERIM_DATA_DIR / "internal_corrected.csv",
-):
-    """
-    清洗原始数据，修正已知问题。
-    
-    修正内容：
-    1. 修正肌肉注射组 Biodistribution_muscle=0.7745 的数据
-    2. 修复阳性对照组 (Amine="Crtl") 的数据
-    3. 按给药途径分组进行 z-score 标准化
-    4. 对 size 列取 log
-    """
-    logger.info(f"Loading data from {input_path}")
-    df = pd.read_excel(input_path, header=2)
-    logger.info(f"Loaded {len(df)} samples")
-
-    # 修正肌肉注射组 0.7745 的数据
-    logger.info("Correcting Biodistribution_muscle=0.7745 rows...")
-    rows_to_correct = df[df["Biodistribution_muscle"] == 0.7745]
-    for index, row in rows_to_correct.iterrows():
-        total_biodistribution = pd.to_numeric(row[[
-            "Biodistribution_lymph_nodes",
-            "Biodistribution_heart",
-            "Biodistribution_liver",
-            "Biodistribution_spleen",
-            "Biodistribution_lung",
-            "Biodistribution_kidney",
-            "Biodistribution_muscle"
-        ]]).sum()
-        df.at[index, "Biodistribution_lymph_nodes"] = pd.to_numeric(row["Biodistribution_lymph_nodes"]) / total_biodistribution
-        df.at[index, "Biodistribution_heart"] = pd.to_numeric(row["Biodistribution_heart"]) / total_biodistribution
-        df.at[index, "Biodistribution_liver"] = pd.to_numeric(row["Biodistribution_liver"]) / total_biodistribution
-        df.at[index, "Biodistribution_spleen"] = pd.to_numeric(row["Biodistribution_spleen"]) / total_biodistribution
-        df.at[index, "Biodistribution_lung"] = pd.to_numeric(row["Biodistribution_lung"]) / total_biodistribution
-        df.at[index, "Biodistribution_kidney"] = pd.to_numeric(row["Biodistribution_kidney"]) / total_biodistribution
-        df.at[index, "Biodistribution_muscle"] = pd.to_numeric(row["Biodistribution_muscle"]) / total_biodistribution
-        df.at[index, "quantified_total_luminescence"] = pd.to_numeric(row["quantified_total_luminescence"]) / (1 - 0.7745)
-        df.at[index, "unnormalized_delivery"] = df.at[index, "quantified_total_luminescence"]
-    logger.info(f"  Corrected {len(rows_to_correct)} rows")
-
-    # 修复阳性对照组的数据
-    logger.info("Fixing control group (Amine='Crtl')...")
-    rows_to_override = df["Amine"] == "Crtl"
-    df.loc[rows_to_override, "quantified_total_luminescence"] = 1
-    df.loc[rows_to_override, "unnormalized_delivery"] = 1
-    logger.info(f"  Fixed {rows_to_override.sum()} rows")
-
-    # 分别对肌肉注射组和静脉注射组重新进行 z-score 标准化
-    logger.info("Z-score normalizing delivery by Route_of_administration...")
-    df["unnormalized_delivery"] = pd.to_numeric(df["unnormalized_delivery"], errors="coerce")
-    df["quantified_delivery"] = (
-        df.groupby("Route_of_administration")["unnormalized_delivery"]
-          .transform(lambda x: (x - x.mean()) / x.std())
-    )
-
-    # 对 size 列取 log
-    logger.info("Log-transforming size column...")
-    df["size"] = pd.to_numeric(df["size"], errors="coerce")
-    df["size"] = np.log(df["size"].replace(0, np.nan))  # 避免 log(0)
-
-    # 保存
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    df.to_csv(output_path, index=False)
-    logger.success(f"Saved cleaned data to {output_path}")
-
-
-if __name__ == "__main__":
-    app()