import numpy as np import matplotlib.pyplot as plt import pandas as pd # 引入資料集 dataset = pd.read_csv("Data.csv") X = dataset.iloc[:, :-1].values Y = dataset.iloc[:, 3].values # 缺損資料處理 from sklearn.impute import SimpleImputer imputer = SimpleImputer(missing_values=np.nan, strategy='mean') X[:, 1: 3] = imputer.fit_transform(X[:, 1:3]) # 分類資料的處理 from sklearn.preprocessing import LabelEncoder labelencoder_X = LabelEncoder() X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) from sklearn.preprocessing import OneHotEncoder from sklearn.compose import ColumnTransformer ct = ColumnTransformer(transformers=[ ('col-0', OneHotEncoder(), [0]) ], remainder='passthrough') X = np.array(ct.fit_transform(X), dtype=float) labelencoder_Y = LabelEncoder() Y = labelencoder_Y.fit_transform(Y) # 將資料集分為訓練集和測試集 from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # 特徵縮放 from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) print(X_train)