2795529-Python One Hot Encoding

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 2

import pandas as pd

from sklearn.preprocessing import StandardScaler, OneHotEncoder


from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Sample dataset
data = {
'Age': [25, 30, 35, None, 40],
'Gender': ['Male', 'Female', 'Male', 'Male', 'Female'],
'Income': [50000, 60000, None, 70000, 80000],
'Education': ['Bachelor', 'Master', 'PhD', 'Bachelor', 'Master']
}

df = pd.DataFrame(data)

# Define preprocessing steps for numerical and categorical features


numeric_features = ['Age', 'Incoming']
categorical_features = ['Gender', 'Education']

numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='mean')),
('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='most_frequent')),
('onehot', OneHotEncoder())
])

# Combine the transformers using ColumnTransformer


preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)
])

# Apply preprocessing to the dataset


transformed_data = preprocessor.fit_transform(df)

# Print the transformed data


print(transformed_data)

#------------------------------------------------------------------------------------------------------------------

# import pandas as pd
# from sklearn.preprocessing import StandardScaler, OneHotEncoder
# from sklearn.compose import ColumnTransformer
# from sklearn.pipeline import Pipeline
# from sklearn.impute import SimpleImputer
#
# # Sample dataset
# data = {
# 'Age': [25, 30, 35, None, 40],
# 'Gender': ['Male', 'Female', 'Male', 'Male', 'Female'],
# 'Income': [50000, 60000, None, 70000, 80000],
# 'Education': ['Bachelor', 'Master', 'PhD', 'Bachelor', 'Master']
# }
#
# df = pd.DataFrame(data)
#
# # Define preprocessing steps for numerical and categorical features
# numeric_features = ['Age', 'Income']
# numeric_transformer = Pipeline(steps=[
# ('imputer', SimpleImputer(strategy='mean')),
# ('scaler', StandardScaler())
# ])
#
# categorical_features = ['Gender', 'Education']
# categorical_transformer = Pipeline(steps=[
# ('imputer', SimpleImputer(strategy='most_frequent')),
# ('onehot', OneHotEncoder())
# ])
#
# # Combine the transformers using ColumnTransformer
# preprocessor = ColumnTransformer(
# transformers=[
# ('num', numeric_transformer, numeric_features),
# ('cat', categorical_transformer, categorical_features)
# ])
#
# # Apply preprocessing to the dataset
# transformed_data = preprocessor.fit_transform(df)
#
# # Concatenate the transformed data back into the original DataFrame
# transformed_df = pd.DataFrame(transformed_data, columns=['Age', 'Income', 'Male', 'Female', 'Bachelor', 'Master/PhD'])
# final_df = pd.concat([df.drop(columns=categorical_features), transformed_df], axis=1)
#
# # Print the final DataFrame
# print(final_df)

You might also like