SciSharp
diff --git a/‎src/TensorFlowNET.Examples/NaturalLanguageProcessing/BertClassification.cs
Lines changed: 74 additions & 0 deletions b/‎src/TensorFlowNET.Examples/NaturalLanguageProcessing/BertClassification.cs
Lines changed: 74 additions & 0 deletions
diff --git a/‎src/TensorFlowNET.Examples/NaturalLanguageProcessing/BertModel/BertConfig.cs
Lines changed: 58 additions & 0 deletions b/‎src/TensorFlowNET.Examples/NaturalLanguageProcessing/BertModel/BertConfig.cs
Lines changed: 58 additions & 0 deletions
@@ -0,0 +1,74 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Tensorflow.Operations.Initializers;
+using static Tensorflow.KerasApi;
+using BERT;
+using Tensorflow.NumPy;
+using static Tensorflow.Binding;
+using static Tensorflow.Keras.Engine.InputSpec;
+
+namespace TensorFlowNET.Examples
+{
+    class BertClassification : SciSharpExample, IExample
+    {
+        int max_seq_len = 180;
+        int batch_size = 4;
+        int num_classes = 2;
+        int epoch = 3;
+        float learning_rate = (float)2e-5;
+        string pretrained_weight_path = "./tf_model.h5";
+        BertConfig config = new BertConfig();
+        NDArray np_x_train;
+        NDArray np_y_train;
+        public ExampleConfig InitConfig()
+        => Config = new ExampleConfig
+        {
+            Name = "Bert for Classification",
+            Enabled = true
+        };
+
+        public override void PrepareData()
+        {
+            // tf.debugging.set_log_device_placement(true);
+            Console.WriteLine("Preparing data...");
+            string url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz";
+            var dataset = keras.utils.get_file("aclImdb_v1.tar.gz", url,
+                untar: true,
+                cache_dir: Path.GetTempPath(),
+                cache_subdir: "aclImdb_v1");
+            var data_dir = Path.Combine(dataset, "aclImdb");
+            var train_dir = Path.Combine(data_dir, "train");
+            (int[,] x_train_neg, int[] y_train_neg) = IMDBDataPreProcessor.
+                    ProcessData(Path.Combine(train_dir, "neg"), max_seq_len, 0);
+            (int[,] x_train_pos, int[] y_train_pos) = IMDBDataPreProcessor.
+                    ProcessData(Path.Combine(train_dir, "pos"), max_seq_len, 1);
+            np_x_train = np.array(x_train_neg, dtype: tf.int32);
+            np_y_train = np.array(y_train_neg, dtype: tf.int32);
+            np_x_train = np.concatenate((np_x_train, np.array(x_train_pos, dtype: tf.int32)), 0);
+            np_y_train = np.concatenate((np_y_train, np.array(y_train_pos, dtype: tf.int32)), 0);
+        }
+
+        public bool Run()
+        {
+            var model = keras.Sequential();
+            model.add(keras.layers.Input(max_seq_len, batch_size, dtype: tf.int32));
+            model.add(new BertMainLayer(config));
+            if(File.Exists(pretrained_weight_path)) model.load_weights(pretrained_weight_path);
+            model.add(keras.layers.Dense(num_classes));
+            model.compile(optimizer: keras.optimizers.AdamW(learning_rate, weight_decay: 0.01f, no_decay_params: new List<string> { "gamma", "beta" }),
+                loss: keras.losses.SparseCategoricalCrossentropy(from_logits: true), metrics: new[] { "acc" });
+            model.summary();
+            PrepareData();
+            model.fit(np_x_train, np_y_train,
+                batch_size: batch_size,
+                epochs: epoch,
+                shuffle: true,
+                validation_split: 0.2f);
+            return true;
+        }
+    }
+}
@@ -0,0 +1,58 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Tensorflow.Keras.ArgsDefinition;
+
+namespace BERT
+{
+    class BertConfig : LayerArgs
+    {
+        public int vocab_size;
+        public int hidden_size;
+        public int num_hidden_layers;
+        public int num_attention_heads;
+        public int intermediate_size;
+        public string hidden_act;
+        public float hidden_dropout_prob;
+        public float attention_probs_dropout_prob;
+        public int max_position_embeddings;
+        public int type_vocab_size;
+        public float initializer_range;
+        public float layer_norm_eps;
+        public int pad_token_id;
+        public string position_embedding_type;
+        public BertConfig(int vocab_size = 30522,
+                          int hidden_size = 768,
+                          int num_hidden_layers = 12,
+                          int num_attention_heads = 12,
+                          int intermediate_size = 3072,
+                          string hidden_act = "gelu",
+                          double hidden_dropout_prob = 0.1,
+                          double attention_probs_dropout_prob = 0.1,
+                          int max_position_embeddings = 512,
+                          int type_vocab_size = 2,
+                          double initializer_range = 0.02,
+                          double layer_norm_eps = 1e-12,
+                          int pad_token_id = 0,
+                          string position_embedding_type = "absolute")
+        {
+            this.vocab_size = vocab_size;
+            this.hidden_size = hidden_size;
+            this.num_hidden_layers = num_hidden_layers;
+            this.num_attention_heads = num_attention_heads;
+            this.intermediate_size = intermediate_size;
+            this.hidden_act = hidden_act;
+            this.hidden_dropout_prob = (float)hidden_dropout_prob;
+            this.attention_probs_dropout_prob = (float)attention_probs_dropout_prob;
+            this.max_position_embeddings = max_position_embeddings;
+            this.type_vocab_size = type_vocab_size;
+            this.initializer_range = (float)initializer_range;
+            this.layer_norm_eps = (float)layer_norm_eps;
+            this.pad_token_id = pad_token_id;
+            this.position_embedding_type = position_embedding_type;
+
+        }
+    }
+}