"""
Neural Network Models for Tabular and Sequential Data
=====================================================
Overview
--------
This module defines two neural network architectures tailored for different data modalities:
- **RealisticLSTM**: A bidirectional LSTM model with attention over time, designed for sequential input (e.g., time series).
- **TabularMLP**: A simple multi-layer perceptron (MLP) for static, tabular input.
Both models are suitable for regression or binary classification tasks, and can be used in conjunction with
feature attribution or SHAP-based interpretability methods.
Model Summaries
^^^^^^^^^^^^^^^^
- **RealisticLSTM**:
- Bidirectional LSTM with configurable depth and dropout.
- Temporal attention mechanism over LSTM outputs.
- Feedforward head for final prediction.
- Input shape: ``(batch_size, seq_len, input_dim)``
- **TabularMLP**:
- Lightweight MLP with one hidden layer and ReLU activation.
- Input shape: ``(batch_size, input_dim)``
Use Case
--------
These models are intended for:
- Interpretable machine learning pipelines.
- SHAP explanation experiments on sequential and tabular benchmarks.
- General-purpose regression or binary classification.
Example
-------
.. code-block:: python
model_seq = RealisticLSTM(input_dim=5, hidden_dim=32)
model_tab = TabularMLP(input_dim=10)
y_pred_seq = model_seq(x_seq) # x_seq: shape (B, T, 5)
y_pred_tab = model_tab(x_tab) # x_tab: shape (B, 10)
"""
import torch
import torch.nn as nn
__all__ = ["RealisticLSTM", "TabularMLP"]
[docs]
class RealisticLSTM(nn.Module):
r"""
Bidirectional LSTM model with temporal attention for sequential input.
This model processes sequence data with a bidirectional LSTM and computes an attention-weighted
context vector across time. The resulting vector is passed through a feedforward head for final prediction.
.. note::
Designed for regression or binary classification tasks on temporal data such as time series.
:param int input_dim: Number of input features at each timestep.
:param int hidden_dim: Hidden size for LSTM and intermediate layers (default: 32).
:param int num_layers: Number of LSTM layers (default: 2).
:param int output_dim: Output dimension, e.g., 1 for scalar prediction (default: 1).
:param float dropout: Dropout rate applied after attention context (default: 0.2).
:return: Output tensor of shape (batch_size, output_dim).
:rtype: torch.Tensor
"""
def __init__(
self, input_dim, hidden_dim=32, num_layers=2, output_dim=1, dropout=0.2
):
super().__init__()
self.lstm = nn.LSTM(
input_dim,
hidden_dim,
num_layers=num_layers,
batch_first=True,
dropout=dropout,
bidirectional=True,
)
self.dropout = nn.Dropout(dropout)
self.attn_fc = nn.Linear(hidden_dim * 2, 1) # attention over time
self.head = nn.Sequential(
nn.Linear(hidden_dim * 2, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim),
)
[docs]
def forward(self, x):
out, _ = self.lstm(x) # (B, T, 2*H)
attn_weights = torch.softmax(self.attn_fc(out).squeeze(-1), dim=1) # (B, T)
context = torch.sum(out * attn_weights.unsqueeze(-1), dim=1) # (B, 2*H)
context = self.dropout(context)
return self.head(context)
[docs]
class TabularMLP(nn.Module):
r"""
Lightweight multilayer perceptron (MLP) for tabular regression or classification tasks.
Consists of a single hidden layer with ReLU activation and a linear output layer.
Suitable for benchmarking SHAP explanations on tabular data.
:param int input_dim: Number of input features.
:param int hidden_dim: Number of hidden units (default: 16).
:param int output_dim: Output dimension (default: 1).
:return: Output tensor of shape (batch_size, output_dim).
:rtype: torch.Tensor
"""
def __init__(self, input_dim, hidden_dim=16, output_dim=1):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim),
)
[docs]
def forward(self, x):
return self.net(x)