import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import confusion_matrix, accuracy_score import seaborn as sns
次に、仮想的なデータセットを作成します。
ここでは、年齢とタンパク質レベルの2つの特徴量を持つ1000人の患者データを生成します。
また、各患者がアルツハイマー病であるかどうかをランダムに決定します。
1 2 3
np.random.seed(0) X = np.random.randint(60, 100, (1000, 2)) # Age and Protein level y = np.random.choice([0, 1], 1000) # 0: Healthy, 1: Alzheimer's
データを訓練セットとテストセットに分割します。
1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
import numpy as np import pandas as pd from sklearn.datasets import load_breast_cancer from sklearn.decomposition import PCA import matplotlib.pyplot as plt
# データセットの読み込み data = load_breast_cancer() X = data.data y = data.target
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt import seaborn as sns
# 仮想データセットの作成 np.random.seed(0) age = np.random.randint(20, 70, 1000) income = np.random.randint(20000, 100000, 1000) will_pay_back = np.random.randint(0, 2, 1000)
from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score from sklearn.tree import DecisionTreeClassifier import matplotlib.pyplot as plt import numpy as np
# データセットのロード iris = load_iris() X, y = iris.data, iris.target
plt.scatter(X_test, y_test, color='black') plt.plot(X_test, y_pred, color='blue', linewidth=3) plt.xlabel('Number of Rooms (RM)') plt.ylabel('Median value of owner-occupied homes in $1000s (MEDV)') plt.title('Linear Regression on Boston Housing Data') plt.show()
import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import make_classification import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score