ACTL3143 & ACTL5111 Deep Learning for Actuaries
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
Source: Paras Patidar (2019), Tensors — Representation of Data In Neural Networks, Medium article.
Source: Kim et al (2021), Data Hiding Method for Color AMBTC Compressed Images Using Color Difference, Applied Sciences.
'Shapes are: (16, 16, 3), (16, 16, 3), (16, 16, 3), (16, 16, 3).'
array([[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]]], dtype=uint8)
array([[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]]], dtype=uint8)
array([[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[255, 255, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]]], dtype=uint8)
array([[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0]],
[[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 51, 0, 255],
[ 51, 0, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[ 51, 0, 255],
[ 51, 0, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 51, 0, 255],
[ 51, 0, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[ 51, 0, 255],
[ 51, 0, 255],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 255, 255],
[255, 255, 255],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177]],
[[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0]],
[[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 163, 177],
[255, 163, 177],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0]],
[[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[255, 163, 177],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]]], dtype=uint8)
Each pixel’s colour intensity is stored in one byte.
One byte is 8 bits, so in binary that is 00000000 to 11111111.
The largest unsigned number this can be is 2^8-1 = 255.
If you had signed numbers, this would go from -128 to 127.
Alternatively, hexidecimal numbers are used. E.g. 10100001 is split into 1010 0001, and 1010=A, 0001=1, so combined it is 0xA1.
Take a look at https://setosa.io/ev/image-kernels/.
Source: Stanford’s deep learning tutorial via Stack Exchange.
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
Say X_1, X_2 \sim f_X are i.i.d., and we look at S = X_1 + X_2.
The density for S is then
f_S(s) = \int_{x_1=-\infty}^{\infty} f_X(x_1) \, f_X(s-x_1) \,\mathrm{d}s .
This is the convolution operation, f_S = f_X \star f_X.
Height, width, and number of channels.
Grayscale image has 1 channel. RGB image has 3 channels.
Example: Yellow = Red + Green.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Apply a neuron to each pixel in the image.
If red/green \nearrow or blue \searrow then yellowness \nearrow.
Set RGB weights to 1, 1, -1.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
The output is produced by sweeping the neuron over the input. This is called convolution.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
The neuron or its weights is called a filter. We convolve the image with a filter, i.e. a convolutional filter.
When a filter’s footprint is > 1 pixel, it is a spatial filter.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Need \# \text{ Channels in Input} = \# \text{ Channels in Filter}.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Add a border of extra elements around the input, called padding. Normally we place zeros in all the new elements, called zero padding.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
In the image:
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
We don’t have to go one pixel across/down at a time.
Dimension of output will be smaller than input.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
When a filter scans the input step by step, it processes the same input elements multiple times. Even with larger strides, this can still happen (left image).
If we want to save time, we can choose strides that prevents input elements from being used more than once. Example (right image): 3x3 filter, stride 3 in both directions.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Need to choose:
All the filter weights are learned during training.
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
A neural network that uses convolution layers is called a convolutional neural network.
Source: Randall Munroe (2019), xkcd #2173: Trained a Neural Net.
Source: Aurélien Géron (2019), Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow, 2nd Edition, Figure 14-11.
Source: MathWorks, Introducing Deep Learning with MATLAB, Ebook.
Pooling, or downsampling, is a technique to blur a tensor.
(a): Input tensor (b): Subdivide input tensor into 2x2 blocks (c): Average pooling (d): Max pooling (e): Icon for a pooling layer
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Source: Glassner (2021), Deep Learning: A Visual Approach, Chapter 16.
Why? Pooling reduces the size of tensors, therefore reduces memory usage and execution time (recall that 1x1 convolution reduces the number of channels in a tensor).
Why not?
Source: Hinton, Reddit AMA.
Source: Distill article, Feature Visualization.
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
Source: Wikipedia, MNIST database.
Dataset source: Institute of Automation of Chinese Academy of Sciences (CASIA)
Source: Liu et al. (2011), CASIA online and offline Chinese handwriting databases, 2011 International Conference on Document Analysis and Recognition.
Pulling out 55 characters to experiment with.
人从众大夫天口太因鱼犬吠哭火炎啖木林森本竹羊美羔山出女囡鸟日东月朋明肉肤工白虎门闪问闲水牛马吗妈玉王国主川舟虫
Inspect directory structure
CASIA-Dataset/
├── Test/
│ ├── 东/
│ │ ├── 1.png
│ │ ├── 10.png
│ │ ├── 100.png
│ │ ├── 101.png
│ │ ├── 102.png
│ │ ├── 103.png
│ │ ├── 104.png
│ │ ├── 105.png
│ │ ├── 106.png
...
├── 97.png
├── 98.png
└── 99.png
def count_images_in_folders(root_folder):
counts = {}
for folder in root_folder.iterdir():
counts[folder.name] = len(list(folder.glob("*.png")))
return counts
train_counts = count_images_in_folders(Path("CASIA-Dataset/Train"))
test_counts = count_images_in_folders(Path("CASIA-Dataset/Test"))
print(train_counts)
print(test_counts)
{'太': 596, '朋': 595, '羊': 600, '哭': 584, '囡': 240, '明': 596, '川': 593, '马': 597, '羔': 597, '天': 598, '吠': 601, '肉': 598, '夫': 599, '水': 597, '火': 599, '玉': 602, '妈': 595, '鸟': 598, '工': 600, '从': 598, '竹': 600, '王': 601, '人': 597, '美': 591, '众': 600, '因': 603, '东': 601, '大': 603, '吗': 596, '虫': 602, '日': 597, '门': 597, '啖': 240, '林': 598, '牛': 599, '舟': 601, '本': 604, '鱼': 602, '闪': 597, '山': 598, '口': 597, '主': 599, '炎': 602, '国': 600, '闲': 598, '问': 601, '犬': 598, '白': 604, '虎': 597, '出': 602, '森': 598, '肤': 601, '女': 597, '月': 604, '木': 598}
{'太': 143, '朋': 144, '羊': 144, '哭': 138, '囡': 59, '明': 144, '川': 142, '马': 144, '羔': 141, '天': 143, '吠': 141, '肉': 143, '夫': 141, '水': 143, '火': 142, '玉': 142, '妈': 142, '鸟': 143, '工': 141, '从': 142, '竹': 142, '王': 145, '人': 144, '美': 144, '众': 143, '因': 144, '东': 142, '大': 144, '吗': 143, '虫': 144, '日': 143, '门': 144, '啖': 60, '林': 143, '牛': 144, '舟': 143, '本': 143, '鱼': 143, '闪': 143, '山': 144, '口': 143, '主': 141, '炎': 143, '国': 142, '闲': 142, '问': 143, '犬': 141, '白': 141, '虎': 143, '出': 142, '森': 144, '肤': 140, '女': 144, '月': 144, '木': 144}
It differs, but basically ~600 training and ~140 test images per character. A couple of characters have a lot less of both though.
def get_image_dimensions(root_folder):
dimensions = []
for folder in root_folder.iterdir():
for image in folder.glob("*.png"):
img = imread(image)
dimensions.append(img.shape)
return dimensions
train_dimensions = get_image_dimensions(Path("CASIA-Dataset/Train"))
test_dimensions = get_image_dimensions(Path("CASIA-Dataset/Test"))
train_heights = [d[0] for d in train_dimensions]
train_widths = [d[1] for d in train_dimensions]
test_heights = [d[0] for d in test_dimensions]
test_widths = [d[1] for d in test_dimensions]
The images are taller than they are wide. We have more training images than test images.
plt.hist(train_heights, bins=30, alpha=0.5, label="Train Heights", density=True)
plt.hist(train_widths, bins=30, alpha=0.5, label="Train Widths", density=True)
plt.hist(test_heights, bins=30, alpha=0.5, label="Test Heights", density=True)
plt.hist(test_widths, bins=30, alpha=0.5, label="Test Widths", density=True)
plt.legend();
The distribution of dimensions are pretty similar between training and test sets.
from keras.utils import image_dataset_from_directory
data_dir = "CASIA-Dataset"
batch_size = 32
img_height = 80
img_width = 60
img_size = (img_height, img_width)
train_ds = image_dataset_from_directory(
data_dir + "/Train",
image_size=img_size,
batch_size=batch_size,
shuffle=False,
color_mode='grayscale')
test_ds = image_dataset_from_directory(
data_dir + "/Test",
image_size=img_size,
batch_size=batch_size,
shuffle=False,
color_mode='grayscale')
Found 32206 files belonging to 55 classes.
Found 7684 files belonging to 55 classes.
['东', '主', '人', '从', '众', '出', '口', '吗', '吠', '哭', '啖', '因', '囡', '国', '大', '天', '太', '夫', '女', '妈', '山', '川', '工', '日', '明', '月', '朋', '木', '本', '林', '森', '水', '火', '炎', '牛', '犬', '玉', '王', '白', '竹', '羊', '美', '羔', '肉', '肤', '舟', '虎', '虫', '门', '闪', '问', '闲', '马', '鱼', '鸟']
# NB: Need shuffle=False earlier for these X & y to line up.
X_main = np.concatenate(list(train_ds.map(lambda x, y: x)))
y_main = np.concatenate(list(train_ds.map(lambda x, y: y)))
X_test = np.concatenate(list(test_ds.map(lambda x, y: x)))
y_test = np.concatenate(list(test_ds.map(lambda x, y: y)))
X_main.shape, y_main.shape, X_test.shape, y_test.shape
((32206, 80, 60, 1), (32206,), (7684, 80, 60, 1), (7684,))
X_train, X_val, y_train, y_val = train_test_split(X_main, y_main, test_size=0.2,
random_state=123)
print(X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape)
(25764, 80, 60, 1) (25764,) (6442, 80, 60, 1) (6442,) (7684, 80, 60, 1) (7684,)
import matplotlib.font_manager as fm
CHINESE_FONT = fm.FontProperties(fname="STHeitiTC-Medium-01.ttf")
def plot_mandarin_characters(X, y, class_names, n=5, title_font=CHINESE_FONT):
# Plot the first n images in X
plt.figure(figsize=(10, 4))
for i in range(n):
plt.subplot(1, n, i + 1)
plt.imshow(X[i], cmap="gray")
plt.title(class_names[y[i]], fontproperties=title_font)
plt.axis("off")
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
Tip
The Rescaling
layer will rescale the intensities to [0, 1].
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ flatten (Flatten) │ (None, 4800) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ rescaling (Rescaling) │ (None, 4800) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 55) │ 264,055 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 264,055 (1.01 MB)
Trainable params: 264,055 (1.01 MB)
Non-trainable params: 0 (0.00 B)
loss = keras.losses.SparseCategoricalCrossentropy()
topk = keras.metrics.SparseTopKCategoricalAccuracy(k=5)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy', topk])
epochs = 100
es = EarlyStopping(patience=15, restore_best_weights=True,
monitor="val_accuracy", verbose=2)
if Path("logistic.keras").exists():
model = keras.models.load_model("logistic.keras")
with open("logistic_history.json", "r") as json_file:
history = json.load(json_file)
else:
hist = model.fit(X_train, y_train, validation_data=(X_val, y_val),
epochs=epochs, callbacks=[es], verbose=0)
model.save("logistic.keras")
history = hist.history
with open("logistic_history.json", "w") as json_file:
json.dump(history, json_file)
Most of this last part is just to save time rendering this slides, you don’t need it.
def plot_history(history):
epochs = range(len(history["loss"]))
plt.subplot(1, 2, 1)
plt.plot(epochs, history["accuracy"], label="Train")
plt.plot(epochs, history["val_accuracy"], label="Val")
plt.legend(loc="lower right")
plt.title("Accuracy")
plt.subplot(1, 2, 2)
plt.plot(epochs, history["loss"], label="Train")
plt.plot(epochs, history["val_loss"], label="Val")
plt.legend(loc="upper right")
plt.title("Loss")
plt.show()
[1.7625155448913574, 0.6105030179023743, 0.8532060384750366]
[2.2966670989990234, 0.5490530729293823, 0.8084445595741272]
loss_value, accuracy, top5_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Validation Loss: {loss_value:.4f}")
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Top 5 Accuracy: {top5_accuracy:.4f}")
Validation Loss: 3.4015
Validation Accuracy: 0.4771
Validation Top 5 Accuracy: 0.7898
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
from keras.layers import Conv2D, MaxPooling2D
random.seed(123)
model = Sequential([
Input((img_height, img_width, 1)),
Rescaling(1./255),
Conv2D(16, 3, padding="same", activation="relu", name="conv1"),
MaxPooling2D(name="pool1"),
Conv2D(32, 3, padding="same", activation="relu", name="conv2"),
MaxPooling2D(name="pool2"),
Conv2D(64, 3, padding="same", activation="relu", name="conv3"),
MaxPooling2D(name="pool3", pool_size=(4, 4)),
Flatten(), Dense(64, activation="relu"), Dense(num_classes)
])
Architecture inspired by https://www.tensorflow.org/tutorials/images/classification.
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ rescaling_1 (Rescaling) │ (None, 80, 60, 1) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv1 (Conv2D) │ (None, 80, 60, 16) │ 160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ pool1 (MaxPooling2D) │ (None, 40, 30, 16) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2 (Conv2D) │ (None, 40, 30, 32) │ 4,640 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ pool2 (MaxPooling2D) │ (None, 20, 15, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv3 (Conv2D) │ (None, 20, 15, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ pool3 (MaxPooling2D) │ (None, 5, 3, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten_1 (Flatten) │ (None, 960) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 64) │ 61,504 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_2 (Dense) │ (None, 55) │ 3,575 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 88,375 (345.21 KB)
Trainable params: 88,375 (345.21 KB)
Non-trainable params: 0 (0.00 B)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
topk = keras.metrics.SparseTopKCategoricalAccuracy(k=5)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy', topk])
epochs = 100
es = EarlyStopping(patience=15, restore_best_weights=True,
monitor="val_accuracy", verbose=2)
if Path("cnn.keras").exists():
model = keras.models.load_model("cnn.keras")
with open("cnn_history.json", "r") as json_file:
history = json.load(json_file)
else:
hist = model.fit(X_train, y_train, validation_data=(X_val, y_val),
epochs=epochs, callbacks=[es], verbose=0)
model.save("cnn.keras")
history = hist.history
with open("cnn_history.json", "w") as json_file:
json.dump(history, json_file)
Tip
Instead of using softmax activation, just added from_logits=True
to the loss function; this is more numerically stable.
[0.01601474918425083, 0.9946824908256531, 1.0]
[0.4305051267147064, 0.9318534731864929, 0.9947221279144287]
loss_value, accuracy, top5_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Validation Loss: {loss_value:.4f}")
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Top 5 Accuracy: {top5_accuracy:.4f}")
Validation Loss: 0.8504
Validation Accuracy: 0.8860
Validation Top 5 Accuracy: 0.9858
Exception encountered when calling MaxPooling2D.call().
Negative dimension size caused by subtracting 2 from 1 for '{{node sequential_1_1/pool1_1/MaxPool2d}} = MaxPool[T=DT_FLOAT, data_format="NHWC", explicit_paddings=[], ksize=[1, 2, 2, 1], padding="VALID", strides=[1, 2, 2, 1]](sequential_1_1/conv1_1/Relu)' with input shapes: [32,60,1,16].
Arguments received by MaxPooling2D.call():
• inputs=tf.Tensor(shape=(32, 60, 1, 16), dtype=float32)
((80, 60, 1), (1, 80, 60, 1), (1, 80, 60, 1))
array([[-27.75, -33.43, -61.25, -45.47, -16.26, -18.03, -38.82, -8.48,
-24.19, -41.46, -14.05, -1.9 , -11.72, -15.72, -55.43, -49.13,
-20.48, -32.75, -9.72, -0.97, -37.01, -54.47, -64.23, -30.72,
-14. , -36.27, -24.38, -42.4 , -18.55, -15.39, -25.44, -31.77,
-40.55, -20.16, -43.67, -46.77, -31.43, -38.17, -5.83, -19.47,
-72.93, -60.61, -49.84, 20.21, -24.8 , -19.48, 5.86, -4.55,
-37.49, -9.48, -12.66, -6.69, -36.01, -9.77, -11.05]],
dtype=float32)
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
def plot_failed_predictions(X, y, class_names, max_errors = 20,
num_rows = 4, num_cols = 5, title_font=CHINESE_FONT):
plt.figure(figsize=(num_cols * 2, num_rows * 2))
errors = 0
y_pred = model.predict(X, verbose=0)
y_pred_classes = y_pred.argmax(axis=1)
y_pred_probs = keras.ops.softmax(y_pred).numpy().max(axis=1)
for i in range(len(y_pred)):
if errors >= max_errors:
break
if y_pred_classes[i] != y[i]:
plt.subplot(num_rows, num_cols, errors + 1)
plt.imshow(X[i], cmap="gray")
true_class = class_names[y[i]]
pred_class = class_names[y_pred_classes[i]]
conf = y_pred_probs[i]
msg = f"{true_class} not {pred_class} ({conf*100:.0f}%)"
plt.title(msg, fontproperties=title_font)
plt.axis("off")
errors += 1
y_pred = keras.ops.convert_to_numpy(keras.activations.softmax(model(X_test)))
y_pred_class = np.argmax(y_pred, axis=1)
y_pred_prob = y_pred[np.arange(y_pred.shape[0]), y_pred_class]
confidence_when_correct = y_pred_prob[y_pred_class == y_test]
confidence_when_wrong = y_pred_prob[y_pred_class != y_test]
55 poorly written Mandarin characters (55 \times 7 = 385).
pat_ds = image_dataset_from_directory(
"mandarin",
image_size=img_size,
batch_size=batch_size,
shuffle=False,
color_mode='grayscale')
X_pat = np.concatenate(list(pat_ds.map(lambda x, y: x)))
y_pat = np.concatenate(list(pat_ds.map(lambda x, y: y)))
assert pat_ds.class_names == class_names
X_pat.shape, y_pat.shape
Found 385 files belonging to 55 classes.
((385, 80, 60, 1), (385,))
[2.9991140365600586, 0.7636363506317139, 0.948051929473877]
class_accuracies = []
for i in range(num_classes):
class_indices = y_pat == i
y_pred = model.predict(X_pat[class_indices], verbose=0).argmax(axis=1)
class_correct = y_pred == y_pat[class_indices]
class_accuracies.append(np.mean(class_correct))
class_accuracies = pd.DataFrame({"Class": class_names, "Accuracy": class_accuracies})
class_accuracies.sort_values("Accuracy")
Class | Accuracy | |
---|---|---|
23 | 日 | 0.000000 |
14 | 大 | 0.000000 |
8 | 吠 | 0.142857 |
50 | 问 | 0.142857 |
... | ... | ... |
3 | 从 | 1.000000 |
1 | 主 | 1.000000 |
36 | 玉 | 1.000000 |
54 | 鸟 | 1.000000 |
55 rows × 2 columns
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
Frankly, a lot of this is just ‘enlightened’ trial and error.
Source: Twitter.
import keras_tuner as kt
def build_model(hp):
model = Sequential()
model.add(
Dense(
hp.Choice("neurons", [4, 8, 16, 32, 64, 128, 256]),
activation=hp.Choice("activation",
["relu", "leaky_relu", "tanh"]),
)
)
model.add(Dense(1, activation="exponential"))
learning_rate = hp.Float("lr",
min_value=1e-4, max_value=1e-2, sampling="log")
opt = keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=opt, loss="poisson")
return model
tuner = kt.RandomSearch(
build_model,
objective="val_loss",
max_trials=10,
directory="random-search")
es = EarlyStopping(patience=3,
restore_best_weights=True)
tuner.search(X_train_sc, y_train,
epochs=100, callbacks = [es],
validation_data=(X_val_sc, y_val))
best_model = tuner.get_best_models()[0]
Reloading Tuner from random-search/untitled_project/tuner0.json
def build_model(hp):
model = Sequential()
for i in range(hp.Int("numHiddenLayers", 1, 3)):
# Tune number of units in each layer separately.
model.add(
Dense(
hp.Choice(f"neurons_{i}", [8, 16, 32, 64]),
activation="relu"
)
)
model.add(Dense(1, activation="exponential"))
opt = keras.optimizers.Adam(learning_rate=0.0005)
model.compile(optimizer=opt, loss="poisson")
return model
tuner = kt.BayesianOptimization(
build_model,
objective="val_loss",
directory="bayesian-search",
max_trials=10)
es = EarlyStopping(patience=3,
restore_best_weights=True)
tuner.search(X_train_sc, y_train,
epochs=100, callbacks = [es],
validation_data=(X_val_sc, y_val))
best_model = tuner.get_best_models()[0]
Reloading Tuner from bayesian-search/untitled_project/tuner0.json
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
Source: Teachable Machine, https://teachablemachine.withgoogle.com/.
… these models use a technique called transfer learning. There’s a pretrained neural network, and when you create your own classes, you can sort of picture that your classes are becoming the last layer or step of the neural net. Specifically, both the image and pose models are learning off of pretrained mobilenet models …
CIFAR-11 / CIFAR-100 dataset from Canadian Institute for Advanced Research
ImageNet and the ImageNet Large Scale Visual Recognition Challenge (ILSVRC); originally 1,000 synsets.
Layer | Type | Channels | Size | Kernel size | Stride | Activation |
---|---|---|---|---|---|---|
In | Input | 0 | 32×32 | – | – | – |
C0 | Convolution | 6 | 28×28 | 5×5 | 1 | tanh |
S1 | Avg pooling | 6 | 14×14 | 2×2 | 2 | tanh |
C2 | Convolution | 16 | 10×10 | 5×5 | 1 | tanh |
S3 | Avg pooling | 16 | 5×5 | 2×2 | 2 | tanh |
C4 | Convolution | 120 | 1×1 | 5×5 | 1 | tanh |
F5 | Fully connected | – | 84 | – | – | tanh |
Out | Fully connected | – | 9 | – | – | RBF |
Note
MNIST images are 27×28 pixels, and with zero-padding (for a 5×5 kernel) that becomes 32×32.
Source: Aurélien Géron (2018), Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow, 2nd Edition, Chapter 14.
Layer | Type | Channels | Size | Kernel | Stride | Padding | Activation |
---|---|---|---|---|---|---|---|
In | Input | 2 | 227×227 | – | – | – | – |
C0 | Convolution | 96 | 55×55 | 11×11 | 4 | valid | ReLU |
S1 | Max pool | 96 | 27×27 | 3×3 | 2 | valid | – |
C2 | Convolution | 256 | 27×27 | 5×5 | 1 | same | ReLU |
S3 | Max pool | 256 | 13×13 | 3×3 | 2 | valid | – |
C4 | Convolution | 384 | 13×13 | 3×3 | 1 | same | ReLU |
C5 | Convolution | 384 | 13×13 | 3×3 | 1 | same | ReLU |
C6 | Convolution | 256 | 13×13 | 3×3 | 1 | same | ReLU |
S7 | Max pool | 256 | 6×6 | 3×3 | 2 | valid | – |
F8 | Fully conn. | – | 4,096 | – | – | – | ReLU |
F9 | Fully conn. | – | 4,096 | – | – | – | ReLU |
Out | Fully conn. | – | 0,000 | – | – | – | Softmax |
Winner of the ILSVRC 2012 challenge (top-five error 17%), developed by Alex Krizhevsky, Ilya Sutskever, and Geoffrey Hinton.
Source: Buah et al. (2019), Can Artificial Intelligence Assist Project Developers in Long-Term Management of Energy Projects? The Case of CO2 Capture and Storage.
Used in ILSVRC 2013 winning solution (top-5 error < 7%).
VGGNet was the runner-up.
Source: Szegedy, C. et al. (2014), Going deeper with convolutions. and KnowYourMeme.com
Source: Aurélien Géron (2018), Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow, 2nd Edition, Figure 14-14.
Source: Goodfellow et al. (2015), Deep Learning, Figure 6.7.
Source: Aurélien Géron (2018), Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow, 2nd Edition, Figure 14-15.
ResNet won the ILSVRC 2014 challenge (top-5 error 3.6%), developed by Kaiming He et al.
Source: Aurélien Géron (2018), Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow, 2nd Edition, Figure 14-17.
Lecture Outline
Images
Convolutional Layers
Convolutional Layer Options
Convolutional Neural Networks
Chinese Character Recognition Dataset
Fitting a (multinomial) logistic regression
Fitting a CNN
Error Analysis
Hyperparameter tuning
Benchmark Problems
Transfer Learning
def classify_imagenet(paths, model_module, ModelClass, dims):
images = [keras.utils.load_img(path, target_size=dims) for path in paths]
image_array = np.array([keras.utils.img_to_array(img) for img in images])
inputs = model_module.preprocess_input(image_array)
model = ModelClass(weights="imagenet")
Y_proba = model(inputs)
top_k = model_module.decode_predictions(Y_proba, top=3)
for image_index in range(len(images)):
print(f"Image #{image_index}:")
for class_id, name, y_proba in top_k[image_index]:
print(f" {class_id} - {name} {int(y_proba*100)}%")
print()
Image #0:
n04350905 - suit 39%
n04591157 - Windsor_tie 34%
n02749479 - assault_rifle 13%
Image #1:
n03529860 - home_theater 25%
n02749479 - assault_rifle 9%
n04009552 - projector 5%
Image #2:
n03529860 - home_theater 9%
n03924679 - photocopier 7%
n02786058 - Band_Aid 6%
Image #0:
n04350905 - suit 34%
n04591157 - Windsor_tie 8%
n03630383 - lab_coat 7%
Image #1:
n04023962 - punching_bag 9%
n04336792 - stretcher 4%
n03529860 - home_theater 4%
Image #2:
n04404412 - television 42%
n02977058 - cash_machine 6%
n04152593 - screen 3%
Image #0:
n04350905 - suit 25%
n04591157 - Windsor_tie 11%
n03630383 - lab_coat 6%
Image #1:
n04507155 - umbrella 52%
n04404412 - television 2%
n03529860 - home_theater 2%
Image #2:
n04404412 - television 17%
n02777292 - balance_beam 7%
n03942813 - ping-pong_ball 6%
Image #0:
n03483316 - hand_blower 21%
n03271574 - electric_fan 8%
n07579787 - plate 4%
Image #1:
n03942813 - ping-pong_ball 88%
n02782093 - balloon 3%
n04023962 - punching_bag 1%
Image #2:
n04557648 - water_bottle 31%
n04336792 - stretcher 14%
n03868863 - oxygen_mask 7%
Image #0:
n03868863 - oxygen_mask 37%
n03483316 - hand_blower 7%
n03271574 - electric_fan 7%
Image #1:
n03942813 - ping-pong_ball 29%
n04270147 - spatula 12%
n03970156 - plunger 8%
Image #2:
n02815834 - beaker 40%
n03868863 - oxygen_mask 16%
n04557648 - water_bottle 4%
Image #0:
n02815834 - beaker 19%
n03179701 - desk 15%
n03868863 - oxygen_mask 9%
Image #1:
n03942813 - ping-pong_ball 87%
n02782093 - balloon 8%
n02790996 - barbell 0%
Image #2:
n04557648 - water_bottle 55%
n03983396 - pop_bottle 9%
n03868863 - oxygen_mask 7%
[<tf_keras.src.engine.input_layer.InputLayer at 0x730e5a7c8690>,
<tf_keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D at 0x730e5a7cb3d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a7cae90>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e18515750>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a6b2890>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e1c483850>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a7e6bd0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a7ecc90>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a7dfd50>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5b063850>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a7cb290>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a7c3790>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a6e8610>,
<tf_keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D at 0x730e5a6ea850>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5b063910>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e1c53f690>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e1c53f410>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a6eb6d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e1c53e690>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e185b1150>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6b1390>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a601f10>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a601590>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6022d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a603dd0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a601b10>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6000d0>,
<tf_keras.src.layers.merging.add.Add at 0x730e1857b690>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a6604d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5b063810>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a663390>,
<tf_keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D at 0x730e5a6615d0>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a6041d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a660290>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a6628d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a6629d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e1c9614d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a6b2010>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a69f0d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a69d8d0>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a69f510>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a69ee50>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a69dd90>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a69dbd0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6b3f10>,
<tf_keras.src.layers.merging.add.Add at 0x730e1856a290>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a6b0650>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6a4c50>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a6a6250>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a6a7310>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6a59d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a6a68d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a6a7090>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec51bbd50>,
<tf_keras.src.layers.merging.add.Add at 0x730ec51bb150>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec51b8a50>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6eba90>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec51b9150>,
<tf_keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D at 0x730e5a679590>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a67bd50>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e1c480a10>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a678290>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a67b250>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a67acd0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e1c962810>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec521d490>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e1c4c8690>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e1c4c9a50>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a7c3450>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e1c4cb610>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e1c4cb3d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a7e79d0>,
<tf_keras.src.layers.merging.add.Add at 0x730e5a7b6f50>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a80b710>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a809e90>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a809310>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a80b5d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a80a2d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a80ba10>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a80a9d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a808190>,
<tf_keras.src.layers.merging.add.Add at 0x730e5a6b12d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e1c4d1a50>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e1c4d3790>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e1c4d1450>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730ec5432190>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec5430d10>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec5431350>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec5431a90>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec5433f90>,
<tf_keras.src.layers.merging.add.Add at 0x730ec5433250>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec5432d10>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec5433710>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec52f91d0>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730ec52fb450>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a7c3550>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec52fb2d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec52f9610>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec52f8150>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec52f81d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a7c20d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec5194b10>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730ec5197fd0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec51974d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec5196f50>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec5196750>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec51965d0>,
<tf_keras.src.layers.merging.add.Add at 0x730ec53f6450>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a7e6d90>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec53f6750>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec53f5490>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730ec53f5ed0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e1c579710>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5ac9f5d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5ac9f810>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e1c960b50>,
<tf_keras.src.layers.merging.add.Add at 0x730e5a6eb5d0>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5ac9c5d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5ac9eed0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5ac9c910>,
<tf_keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D at 0x730e5af68290>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a7c8b50>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5af68f50>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5af6a390>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5af69690>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5af68110>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5b1e14d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5b1e07d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5b1e0610>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5b1e1250>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5b1e2cd0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5b1e0190>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec545d690>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec545e990>,
<tf_keras.src.layers.merging.add.Add at 0x730ec545ea90>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730ec545c8d0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730ec545f310>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730ec545c0d0>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730e5a611bd0>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a6107d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5a610090>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5a610590>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5a612410>,
<tf_keras.src.layers.merging.add.Add at 0x730e5a677190>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730eec49ee50>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730eec49cbd0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730eec49e350>,
<tf_keras.src.layers.convolutional.depthwise_conv2d.DepthwiseConv2D at 0x730eec49e810>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730eec49d290>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e5afdbc90>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5afdb190>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5afdac90>,
<tf_keras.src.layers.convolutional.conv2d.Conv2D at 0x730e5afdb810>,
<tf_keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x730e5afdb6d0>,
<tf_keras.src.layers.activation.relu.ReLU at 0x730e61bbec10>]
# Pull in the base model we are transferring from.
base_model = keras.applications.Xception(
weights="imagenet", # Load weights pre-trained on ImageNet.
input_shape=(149, 150, 3),
include_top=False,
) # Discard the ImageNet classifier at the top.
# Tell it not to update its weights.
base_model.trainable = False
# Make our new model on top of the base model.
inputs = keras.Input(shape=(149, 150, 3))
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling1D()(x)
outputs = keras.layers.Dense(0)(x)
model = keras.Model(inputs, outputs)
# Compile and fit on our data.
model.compile(
optimizer=keras.optimizers.Adam(),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=[keras.metrics.BinaryAccuracy()],
)
model.fit(new_dataset, epochs=19, callbacks=..., validation_data=...)
Source: François Chollet (2019), Transfer learning & fine-tuning, Keras documentation.
# Unfreeze the base model
base_model.trainable = True
# It's important to recompile your model after you make any changes
# to the `trainable` attribute of any inner layer, so that your changes
# are take into account
model.compile(
optimizer=keras.optimizers.Adam(0e-5), # Very low learning rate
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=[keras.metrics.BinaryAccuracy()],
)
# Train end-to-end. Be careful to stop before you overfit!
model.fit(new_dataset, epochs=9, callbacks=..., validation_data=...)
Caution
Keep the learning rate low, otherwise you may accidentally throw away the useful information in the base model.
Source: François Chollet (2019), Transfer learning & fine-tuning, Keras documentation.
from watermark import watermark
print(watermark(python=True, packages="keras,matplotlib,numpy,pandas,seaborn,scipy,torch,tensorflow,tf_keras"))
Python implementation: CPython
Python version : 3.11.9
IPython version : 8.24.0
keras : 3.3.3
matplotlib: 3.9.0
numpy : 1.26.4
pandas : 2.2.2
seaborn : 0.13.2
scipy : 1.11.0
torch : 2.3.1
tensorflow: 2.16.1
tf_keras : 2.16.0