Examples
Example 1: Training Pipeline With Imbalance Handling
from pipeline_forge import FeaturePipelineBuilder
train_pipe = FeaturePipelineBuilder(target_col="target", random_seed=42)
(
train_pipe
.validate_schema(required_cols=["city", "channel", "hour", "age", "balance"], strict=False)
.group_rare_categories(categorical_cols=["city"], min_freq=0.02)
.target_encode(categorical_cols=["city", "channel"], smoothing=20.0, drop_original=True)
.encode_cyclical_time(period_map={"hour": 24})
.add_interaction_features(numeric_cols=["age", "balance"])
.clip_outliers(numeric_cols=["balance"], lower_quantile=0.01, upper_quantile=0.99)
.transform_distribution(numeric_cols=["balance"], method="yeo-johnson")
.scaler("robust")
.monitor_drift(numeric_cols=["age", "balance"])
.random_oversampling(sampling_strategy=1.0)
)
train_pipe.build_pipeline()
X_train_ft = train_pipe.fit_transform_features(X_train, y_train)
X_train_bal, y_train_bal = train_pipe.fit_resample_features(X_train, y_train)
Example 2: Scoring Pipeline (No Resampling)
score_pipe = FeaturePipelineBuilder(target_col="target", random_seed=42)
(
score_pipe
.validate_schema(required_cols=["city", "channel", "hour", "age", "balance"], strict=False)
.group_rare_categories(categorical_cols=["city"], min_freq=0.02)
.target_encode(categorical_cols=["city", "channel"], smoothing=20.0, drop_original=True)
.encode_cyclical_time(period_map={"hour": 24})
.add_interaction_features(numeric_cols=["age", "balance"])
.scaler("robust")
)
score_pipe.build_pipeline()
score_pipe.fit_features(X_train, y_train)
X_test_ft = score_pipe.transform_features(X_test)
Example 3: Text + Tabular Pipeline
pipe = FeaturePipelineBuilder(target_col="target")
(
pipe
.validate_schema(required_cols=["description", "amount", "country"], strict=False)
.group_rare_categories(categorical_cols=["country"], min_freq=0.01)
.target_encode(categorical_cols=["country"], drop_original=True)
.vectorize_text(text_col="description", max_features=300, prefix="DESC", drop_original=True)
.scaler("standard")
)
pipe.build_pipeline()
X_ft = pipe.fit_transform_features(X_train, y_train)
Example 4: Selection-Focused Pipeline
pipe = FeaturePipelineBuilder(target_col="target", random_seed=42)
(
pipe
.validate_schema(required_cols=list(X_train.columns), strict=False)
.fill_na({"age": X_train["age"].median()})
.select_by_correlation(drop_thresh=0.97)
.select_by_random_forest(importance_thresh=0.85)
)
pipe.build_pipeline()
X_selected = pipe.fit_transform_features(X_train, y_train)
Example 5: Explainability Report Hook
from sklearn.ensemble import RandomForestClassifier
estimator = RandomForestClassifier(random_state=42)
estimator.fit(X_train_ft, y_train)
pipe = FeaturePipelineBuilder(target_col="target", random_seed=42)
pipe.explain_with_permutation_importance(model=estimator, scoring="roc_auc", n_repeats=10)
pipe.build_pipeline()
_ = pipe.fit_transform_features(X_train_ft, y_train)
# Stored on the explainability transformer instance.
# Access path depends on your built step sequence.