diff --git a/optimus/optimus16K-wikitext103.model b/optimus/optimus16K-wikitext103.model
new file mode 100644
index 0000000000000000000000000000000000000000..dfb4afc9fa857a1c2b860bb4418e943e99098148
Binary files /dev/null and b/optimus/optimus16K-wikitext103.model differ
diff --git a/optimus/optimus32K-wikitext103.model b/optimus/optimus32K-wikitext103.model
deleted file mode 100644
index a13ec4baf18de12b03b1c3ce39b696ab465482a1..0000000000000000000000000000000000000000
Binary files a/optimus/optimus32K-wikitext103.model and /dev/null differ
diff --git a/optimus/optimus60K-wikitext103.model b/optimus/optimus60K-wikitext103.model
index 16d7047c07944c152634fbfe8920c001b5621253..bde1bf13e7a6742e70cf5ac5dc1458141c19168e 100644
Binary files a/optimus/optimus60K-wikitext103.model and b/optimus/optimus60K-wikitext103.model differ
diff --git a/optimus/tokenizer.py b/optimus/tokenizer.py
index 2df2ad1afd2302605587f9bda46b39d38be39a3f..170d4a5caf946894809fbf4a27a9e2f8b5dfe8ba 100644
--- a/optimus/tokenizer.py
+++ b/optimus/tokenizer.py
@@ -11,7 +11,7 @@ class Tokenizer():
         SentencePiece tokenizer.
 
         Args:
-            model (str): Path of the tokenizer model. Defaults to
+            model_path (str): Path of the tokenizer model. Defaults to
                 'optimus.model'.
 
         """
@@ -58,7 +58,10 @@ class Tokenizer():
                                        vocab_size=vocab_size,
                                        max_sentence_length=4096,
                                        input_sentence_size=1000000,
-                                       shuffle_input_sentence=True)
+                                       shuffle_input_sentence=True,
+                                       remove_extra_whitespaces=False,
+                                       normalization_rule_name='identity',
+                                       model_type='unigram')
 
     def encode(self, input: str, bos: bool, eos: bool) -> List[int]:
         """
@@ -118,15 +121,15 @@ if __name__=='__main__':
 
     train = False
 
-    if train:
+    if train is True:
         filename = './wikitext-103/wiki.train.tokens'
         with open(filename, 'r') as f:
             lines = f.readlines()
-            print(type(iter(lines)))
             Tokenizer.train(iter(lines), vocab_size=16000)
 
     else:
-        tok = Tokenizer(root='..')
+        tok = Tokenizer(model_path='./optimus.model')
+        print(len(tok))
         print(tok.encode("this is some sunny day", False, True))
         print(tok.encode_as_pieces("this is some sunny day"))
         print(tok.decode([1, 77, 34, 122, 9, 5, 10181, 206, 2]))