使用 Keras 和 Tensorflow
Evaluate 可以轻松集成到您的 Keras 和 Tensorflow 工作流程中。我们将演示两种将 Evaluate 集成到模型训练中的方法,使用 Fashion MNIST 示例数据集。我们将训练一个标准分类器来预测该数据集中的两个类别,并展示如何在训练期间或之后使用指标作为回调进行评估。
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import evaluate
# We pull example code from Keras.io's guide on classifying with MNIST
# Located here: https://keras.org.cn/examples/vision/mnist_convnet/
# Model / data parameters
input_shape = (28, 28, 1)
# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
# Only select tshirts/tops and trousers, classes 0 and 1
def get_tshirts_tops_and_trouser(x_vals, y_vals):
mask = np.where((y_vals == 0) | (y_vals == 1))
return x_vals[mask], y_vals[mask]
x_train, y_train = get_tshirts_tops_and_trouser(x_train, y_train)
x_test, y_test = get_tshirts_tops_and_trouser(x_test, y_test)
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
model = keras.Sequential(
[
keras.Input(shape=input_shape),
layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Flatten(),
layers.Dropout(0.5),
layers.Dense(1, activation="sigmoid"),
]
)
回调
假设我们希望在模型训练期间跟踪模型指标。我们可以使用回调来在训练结束后计算每个 epoch 结束后的指标。
我们将在此定义一个回调,它将接收一个指标名称和我们的训练数据,并在 epoch 结束后计算指标。
class MetricsCallback(keras.callbacks.Callback):
def __init__(self, metric_name, x_data, y_data) -> None:
super(MetricsCallback, self).__init__()
self.x_data = x_data
self.y_data = y_data
self.metric_name = metric_name
self.metric = evaluate.load(metric_name)
def on_epoch_end(self, epoch, logs=dict()):
m = self.model
# Ensure we get labels of "1" or "0"
training_preds = np.round(m.predict(self.x_data))
training_labels = self.y_data
# Compute score and save
score = self.metric.compute(predictions = training_preds, references = training_labels)
logs.update(score)
我们可以将此类传递给 callbacks
关键字参数,以便在训练期间使用它。
batch_size = 128
epochs = 2
model.compile(loss="binary_crossentropy", optimizer="adam")
model_history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1,
callbacks = [MetricsCallback(x_data = x_train, y_data = y_train, metric_name = "accuracy")])
使用 Evaluate 指标进行...评估!
我们也可以在模型训练后使用相同的指标!在这里,我们展示了如何在测试集上训练后检查模型的准确性。
acc = evaluate.load("accuracy")
# Round the predictions to turn them into "0" or "1" labels
test_preds = np.round(model.predict(x_test))
test_labels = y_test
print("Test accuracy is : ", acc.compute(predictions = test_preds, references = test_labels))
# Test accuracy is : 0.9855