In [1]:
# ガラス物性の多出力回帰

import warnings
warnings.filterwarnings("ignore")

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.compose import TransformedTargetRegressor

# ============= ユーザー設定 =============
DATA_PATH = "./densityRT-TgTypical-refractivedline.csv"
TARGET_COLS = ['Density','Tg','RI']  # 目的変数の列名

TEST_SIZE = 0.3
RANDOM_STATE = 0
HIDDEN_SIZES = (80, 80)
MAX_ITER = 3000
ALPHA = 1e-5

# ============= データ読み込み・前処理 =============
df = pd.read_csv(DATA_PATH).fillna(0)

# X = 数値列からターゲット列を除いたもの
num_df = df.select_dtypes(include=[np.number])
X = num_df.drop(columns=TARGET_COLS, errors="ignore").values
Y = df[TARGET_COLS].values

print("[INFO] X shape:", X.shape, "Y shape:", Y.shape)

# ============= 学習・評価 =============
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=TEST_SIZE, random_state=RANDOM_STATE
)

# MLPの設定
base_mlp = MLPRegressor(
    hidden_layer_sizes=HIDDEN_SIZES,
    random_state=50,
    max_iter=MAX_ITER,
    alpha=ALPHA,
    early_stopping=True,
    n_iter_no_change=25,
    validation_fraction=0.1
)

# XとYを標準化するモデル
model = Pipeline([
    ("scaler", StandardScaler()),
    ("mlp", TransformedTargetRegressor(
        regressor=base_mlp,
        transformer=StandardScaler()
    ))
])

#データの70％を使って学習し，残りの30％で精度を確認
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)

print("\n===== R² scores (with Y standardized) =====")
print("Total:", r2_score(Y_test, Y_pred, multioutput="uniform_average"))
for i, tgt in enumerate(TARGET_COLS):
    print(f"{tgt}:", r2_score(Y_test[:, i], Y_pred[:, i]))

# ============= プロット範囲の指定 =============
plot_ranges = {
    "Density": (0.0, 10.0),
    "Tg": (0, 1000),
    "RI": (1.0, 2.5)
}

# ============= 可視化（予測 vs 実測） =============
os.makedirs("figs", exist_ok=True)
for i, tgt in enumerate(TARGET_COLS):
    plt.figure()
    plt.scatter(Y_test[:, i], Y_pred[:, i], s=16)

    # 指定範囲
    xmin, xmax = plot_ranges[tgt]
    plt.plot([xmin, xmax], [xmin, xmax], "--")
    plt.xlim(xmin, xmax)
    plt.ylim(xmin, xmax)

    plt.xlabel(f"Actual {tgt}")
    plt.ylabel(f"Predicted {tgt}")
    plt.title(f"Pred vs Actual ({tgt})")
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(f"figs/pred_vs_actual_{tgt}.png", dpi=150)
    plt.close()

print("\n[INFO] 図を figs/ に保存しました。")



[INFO] X shape: (4900, 88) Y shape: (4900, 3)

===== R² scores (with Y standardized) =====
Total: 0.8257248787419984
Density: 0.7832146793319618
Tg: 0.8646976288274539
RI: 0.8292623280665796

[INFO] 図を figs/ に保存しました。
