import numpy as np import matplotlib.pyplot as plt import pandas as pd # 資料預處理 dataset = pd.read_csv("Mall_Customers.csv") X = dataset.iloc[:, 3:5].values # 無監督學習,無需應變數 # 獲取 K,得到結果 K = 5 from sklearn.cluster import KMeans # wcss = [] # # for i in range(1, 11): # kmeans = KMeans(n_clusters=i, max_iter=300, n_init=10, init='k-means++', random_state=0) # kmeans.fit(X) # wcss.append(kmeans.inertia_) # # plt.plot(range(1, 11), wcss) # plt.title('The Elbow Method') # plt.xlabel('Number of clusters') # plt.ylabel('WCSS') # plt.show() # 開始分析 kmeans = KMeans(n_clusters=5, max_iter=300, n_init=10, init='k-means++', random_state=0) Y_kmeans = kmeans.fit_predict(X) print(Y_kmeans) # 視覺化 plt.scatter(X[Y_kmeans == 0, 0], X[Y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1') plt.scatter(X[Y_kmeans == 1, 0], X[Y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') plt.scatter(X[Y_kmeans == 2, 0], X[Y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3') plt.scatter(X[Y_kmeans == 3, 0], X[Y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') plt.scatter(X[Y_kmeans == 4, 0], X[Y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids') plt.title('Clusters of customers') plt.xlabel('Annual Income (k$)') plt.ylabel('Spending Score (1-100)') plt.legend() plt.show()