Following example shows how to group age variable into groups, and some simple missing value imputaiton proecdures.
There is also an example to transform timestamp variable to week day and hour infomation.
import pandas as pd from sklearn.base import BaseEstimator, TransformerMixin # utility functions def age_input(age): if pd.isnull(age): return 'missing' age = int(age) if age<=20: return '16-20' elif age<=24: return '21-24' elif age<=34: return '25-34' elif age<=44: return '35-44' elif age<=54: return '45-54' elif age<=64: return '55-64' else: return '65+' # missing value handelling or imputation in dataframe def missing_handle(df): for col in df.columns: if df[col].dtype==object: df[col] = df[col].fillna('missing') elif df[col].dtype == bool: df[col+'_null'] = df[col].apply(lambda x: 1 if pd.isnull(x) else 0) df[col] = data[col].fillna(data[col].mode()[0]) else: df[col] = df[col].fillna(-999) return df class dayandhour_Transformer(BaseEstimator, TransformerMixin): # Class Constructor def __init__(self): print('initialized') # Return self, nothing else to do here def fit(self, X, y=None): return self # Customized transformer method def transform(self, X_, y=None): X = X_.copy() X['dayofweek']=pd.to_datetime(X['sentat']).dt.dayofweek X['hour']=pd.to_datetime(X['sentat']).dt.hour X = X.drop('sentat',axis=1) # apply age group function here X['age_group'] = X['age'].apply(age_input) X = X.drop('age',axis=1) # apply missing handelling here X = missing_handle(X) return X # define the transformer dayandhour_transformer = dayandhour_Transformer() # usage example df_new = dayandhour_transformer.transform(df)