VoiceTurn - NN Classifier

import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, InputLayer, Dropout, Conv1D, Conv2D, Flatten, Reshape, MaxPooling1D, MaxPooling2D, AveragePooling2D, BatchNormalization, Permute, ReLU, Softmax, Activation from tensorflow.keras.optimizers.legacy import Adam # Data augmentation for spectrograms, which can be configured in visual mode. # To learn what these arguments mean, see the SpecAugment paper: # https://arxiv.org/abs/1904.08779 sa = SpecAugment(spectrogram_shape=[int(input_length / 13), 13], mF_num_freq_masks=0, F_freq_mask_max_consecutive=0, mT_num_time_masks=1, T_time_mask_max_consecutive=1, enable_time_warp=False, W_time_warp_max_distance=6, mask_with_mean=False) train_dataset = train_dataset.map(sa.mapper(), num_parallel_calls=tf.data.AUTOTUNE) EPOCHS = args.epochs or 100 LEARNING_RATE = args.learning_rate or 0.005 # If True, non-deterministic functions (e.g. shuffling batches) are not used. # This is False by default. ENSURE_DETERMINISM = args.ensure_determinism # this controls the batch size, or you can manipulate the tf.data.Dataset objects yourself BATCH_SIZE = args.batch_size or 32 if not ENSURE_DETERMINISM: train_dataset = train_dataset.shuffle(buffer_size=BATCH_SIZE*4) train_dataset=train_dataset.batch(BATCH_SIZE, drop_remainder=False) validation_dataset = validation_dataset.batch(BATCH_SIZE, drop_remainder=False) # model architecture model = Sequential() # Data augmentation, which can be configured in visual mode model.add(tf.keras.layers.GaussianNoise(stddev=0.45, input_shape=(input_length,))) model.add(Reshape((int(input_length / 13), 13), input_shape=(input_length, ))) model.add(Conv1D(8, kernel_size=3, padding='same', activation='relu')) model.add(MaxPooling1D(pool_size=2, strides=2, padding='same')) model.add(Dropout(0.25)) model.add(Conv1D(16, kernel_size=3, padding='same', activation='relu')) model.add(MaxPooling1D(pool_size=2, strides=2, padding='same')) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(classes, name='y_pred', activation='softmax')) # this controls the learning rate opt = Adam(learning_rate=LEARNING_RATE, beta_1=0.9, beta_2=0.999) callbacks.append(BatchLoggerCallback(BATCH_SIZE, train_sample_count, epochs=EPOCHS, ensure_determinism=ENSURE_DETERMINISM)) # train the neural network model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"]) model.fit(train_dataset, epochs=EPOCHS, validation_data=validation_dataset, verbose=2, callbacks=callbacks) # Use this flag to disable per-channel quantization for a model. # This can reduce RAM usage for convolutional models, but may have # an impact on accuracy. disable_per_channel_quantization = False

Neural Network settings

Training settings

Augmentation settings

Advanced training settings

Audio training options

Neural network architecture

Model

Last training performance (validation set)

Accuracy

Loss

Confusion matrix (validation set)

Data explorer (classified samples)

On-device performance

Inferencing time

Peak RAM usage

Flash usage

Last training performance (validation set)

Accuracy

Loss

Confusion matrix (validation set)

Data explorer (classified samples)

On-device performance

Inferencing time

Peak RAM usage

Flash usage

Neural Network settings

Training settings

Augmentation settings

Advanced training settings

Audio training options

Neural network architecture

Model

Last training performance (validation set)

Accuracy

Loss

Confusion matrix (validation set)

Data explorer (classified samples)

Settings

On-device performance

Inferencing time

Peak RAM usage

Flash usage

Last training performance (validation set)

Accuracy

Loss

Confusion matrix (validation set)

Data explorer (classified samples)

Settings

On-device performance

Inferencing time

Peak RAM usage

Flash usage