update

lonePatient · lonePatient · commit 7fa38a45cd72 · 2019-11-03T20:54:42.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -3,7 +3,6 @@ __pycache__/
 *.py[cod]
 *$py.class
 
-# C extensions
 *.so
 
 # Distribution / packaging
diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@ arxiv: https://arxiv.org/pdf/1909.11942.pdf
 * Pre-LN: 把Layer Norm换个位置，比如放在Residual的过程之中（称为`Pre-LN Transformer`）
 
 <p align="center"><img width="200" src="https://lonepatient-1257945978.cos.ap-chengdu.myqcloud.com/Selection_001.png" /></p>
+
 paper: [On Layer Normalization in the Transformer Architecture](https://openreview.net/forum?id=B1x8anVFPr)
 
 **使用方式**
diff --git a/README_EN.md b/README_EN.md
@@ -16,7 +16,7 @@ arxiv: https://arxiv.org/pdf/1909.11942.pdf
 - scikit-learn
 - sentencepiece
 
-### Download Pre-trained Models of English
+## Download Pre-trained Models of English
 
 Version 2 of ALBERT models is relased. TF-Hub modules are available:
 
diff --git a/albert_chinese_pytorch/callback/optimizater.py b/albert_chinese_pytorch/callback/optimizater.py
@@ -23,7 +23,6 @@
             'Ranger',
             'BertAdam'
             ]
-
 class SGDW(Optimizer):
     r"""Implements stochastic gradient descent (optionally with momentum) with
     weight decay from the paper `Fixing Weight Decay Regularization in Adam`_.
diff --git a/albert_chinese_pytorch/convert_albert_tf_checkpoint_to_pytorch.py b/albert_chinese_pytorch/convert_albert_tf_checkpoint_to_pytorch.py
@@ -24,6 +24,7 @@
 from model.modeling_albert import BertConfig, AlbertForPreTraining, load_tf_weights_in_albert
 
 import logging
+
 logging.basicConfig(level=logging.INFO)
 
 def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,share_type, pytorch_dump_path):
diff --git a/albert_chinese_pytorch/dataset/__init__.py b/albert_chinese_pytorch/dataset/__init__.py
@@ -1 +1 @@
-#encoding:utf-8
+#encoding:utf-8
diff --git a/albert_chinese_pytorch/lcqmc_progressor.py b/albert_chinese_pytorch/lcqmc_progressor.py
@@ -23,7 +23,6 @@ def __init__(self, guid, text_a, text_b=None, label=None):
         self.text_b = text_b
         self.label = label
 
-
 class InputFeature(object):
     '''
     A single set of features of data.
diff --git a/albert_chinese_pytorch/metrics/glue_compute_metrics.py b/albert_chinese_pytorch/metrics/glue_compute_metrics.py
@@ -17,7 +17,6 @@
 import csv
 import sys
 import logging
-
 logger = logging.getLogger(__name__)
 
 try:
diff --git a/albert_chinese_pytorch/model/configuration_utils.py b/albert_chinese_pytorch/model/configuration_utils.py
@@ -17,7 +17,6 @@
 
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
-
 import copy
 import json
 import logging
diff --git a/albert_chinese_pytorch/outputs/__init__.py b/albert_chinese_pytorch/outputs/__init__.py
@@ -1 +1 @@
-#encoding:utf-8
+#encoding:utf-8
diff --git a/albert_chinese_pytorch/prepare_lm_data_mask.py b/albert_chinese_pytorch/prepare_lm_data_mask.py
@@ -26,7 +26,6 @@ def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens):
         else:
             trunc_tokens.pop()
 
-
 def create_instances_from_document(all_documents, document_index, max_seq_length, short_seq_prob,
                                    masked_lm_prob, max_predictions_per_seq, vocab_words):
     """Creates `TrainingInstance`s for a single document.
diff --git a/albert_chinese_pytorch/prepare_lm_data_ngram.py b/albert_chinese_pytorch/prepare_lm_data_ngram.py
@@ -27,7 +27,6 @@ def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens):
         else:
             trunc_tokens.pop()
 
-
 def create_instances_from_document(all_documents, document_index, max_seq_length, short_seq_prob,
                                    max_ngram, masked_lm_prob, max_predictions_per_seq, vocab_words):
     """Creates `TrainingInstance`s for a single document.
diff --git a/albert_chinese_pytorch/prev_trained_model/albert_tiny/config.json b/albert_chinese_pytorch/prev_trained_model/albert_tiny/config.json
@@ -10,7 +10,6 @@
   "max_position_embeddings": 512, 
   "num_attention_heads": 12,
   "num_hidden_layers": 4,
-
   "pooler_fc_size": 768,
   "pooler_num_attention_heads": 12,
   "pooler_num_fc_layers": 3, 
diff --git a/albert_chinese_pytorch/processors/glue.py b/albert_chinese_pytorch/processors/glue.py
@@ -22,7 +22,6 @@
 
 logger = logging.getLogger(__name__)
 
-
 def collate_fn(batch):
     """
     batch should be a list of (sequence, target, length) tuples...
diff --git a/albert_chinese_pytorch/run_classifier.py b/albert_chinese_pytorch/run_classifier.py
@@ -46,7 +46,6 @@
     'albert': (BertConfig, AlbertForSequenceClassification, BertTokenizer)
 }
 
-
 def train(args, train_dataset, model, tokenizer):
     """ Train the model """
     args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
diff --git a/albert_chinese_pytorch/run_classifier_lcqmc.sh b/albert_chinese_pytorch/run_classifier_lcqmc.sh
@@ -3,6 +3,7 @@ export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/albert_tiny
 export DATA_DIR=$CURRENT_DIR/dataset
 export OUTPUR_DIR=$CURRENT_DIR/outputs
 TASK_NAME="lcqmc"
+
 python run_classifier.py \
   --model_type=albert \
   --model_name_or_path=$BERT_BASE_DIR \
diff --git a/albert_chinese_pytorch/run_pretraining.py b/albert_chinese_pytorch/run_pretraining.py
@@ -19,6 +19,7 @@
 from tools.common import seed_everything
 
 InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next")
+
 def convert_example_to_features(example, tokenizer, max_seq_length):
     tokens = example["tokens"]
     segment_ids = example["segment_ids"]
diff --git a/albert_chinese_pytorch/tools/common.py b/albert_chinese_pytorch/tools/common.py
@@ -10,6 +10,7 @@
 import logging
 
 logger = logging.getLogger()
+
 def init_logger(log_file=None, log_file_level=logging.NOTSET):
     '''
     Example:
diff --git a/albert_english_pytorch/callback/progressbar.py b/albert_english_pytorch/callback/progressbar.py
@@ -1,5 +1,4 @@
 import time
-
 class ProgressBar(object):
     '''
     custom progress bar
diff --git a/albert_english_pytorch/convert_albert_tf_checkpoint_to_pytorch.py b/albert_english_pytorch/convert_albert_tf_checkpoint_to_pytorch.py
@@ -20,7 +20,6 @@
 
 import argparse
 import torch
-
 from model.modeling_albert import BertConfig, AlbertForPreTraining, load_tf_weights_in_albert
 
 import logging
diff --git a/albert_english_pytorch/dataset/qqp/.gitignore b/albert_english_pytorch/dataset/qqp/.gitignore
@@ -3,7 +3,7 @@ __pycache__/
 *.py[cod]
 *$py.class
 
-# C extensions
+# C extension
 *.so
 
 # Distribution / packaging
diff --git a/albert_english_pytorch/metrics/glue_compute_metrics.py b/albert_english_pytorch/metrics/glue_compute_metrics.py
@@ -19,7 +19,6 @@
 import logging
 
 logger = logging.getLogger(__name__)
-
 try:
     from scipy.stats import pearsonr, spearmanr
     from sklearn.metrics import matthews_corrcoef, f1_score
diff --git a/albert_english_pytorch/model/configuration_bert.py b/albert_english_pytorch/model/configuration_bert.py
@@ -23,7 +23,6 @@
 from io import open
 
 from model.configuration_utils import PretrainedConfig
-
 logger = logging.getLogger(__name__)
 
 BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
diff --git a/albert_english_pytorch/outputs/cola_output/__init__.py b/albert_english_pytorch/outputs/cola_output/__init__.py
@@ -1 +1,2 @@
-#encoding:utf-8
+#encoding:utf-8
+
diff --git a/albert_english_pytorch/prev_trained_model/albert_base_tf_v1/.gitignore b/albert_english_pytorch/prev_trained_model/albert_base_tf_v1/.gitignore
@@ -3,7 +3,7 @@ __pycache__/
 *.py[cod]
 *$py.class
 
-# C extensions
+# C extension
 *.so
 
 # Distribution / packaging
diff --git a/albert_english_pytorch/processors/__init__.py b/albert_english_pytorch/processors/__init__.py
@@ -2,3 +2,4 @@
 from .glue import (glue_output_modes, glue_processors, glue_tasks_num_labels,
                    glue_convert_examples_to_features,collate_fn)
 
+
diff --git a/albert_english_pytorch/run_classifier.py b/albert_english_pytorch/run_classifier.py
@@ -34,7 +34,6 @@
 
 from metrics.glue_compute_metrics import compute_metrics
 from processors import glue_output_modes as output_modes
-
 from processors import glue_processors as processors
 from processors import glue_convert_examples_to_features as convert_examples_to_features
 from processors import collate_fn
diff --git a/albert_english_pytorch/run_classifier_cola.sh b/albert_english_pytorch/run_classifier_cola.sh
@@ -3,6 +3,7 @@ export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/albert_base_v2
 export DATA_DIR=$CURRENT_DIR/dataset
 export OUTPUR_DIR=$CURRENT_DIR/outputs
 TASK_NAME="cola"
+
 python run_classifier.py \
   --model_type=albert \
   --model_name_or_path=$BERT_BASE_DIR \
diff --git a/albert_english_pytorch/run_classifier_mnli.sh b/albert_english_pytorch/run_classifier_mnli.sh
@@ -20,4 +20,5 @@ python run_classifier.py \
   --logging_steps=24544 \
   --save_steps=24544 \
   --output_dir=$OUTPUR_DIR/${TASK_NAME}_output/ \
-  --overwrite_output_dir
+  --overwrite_output_dir \
+  --seed=42
diff --git a/albert_english_pytorch/run_classifier_qqp.sh b/albert_english_pytorch/run_classifier_qqp.sh
@@ -3,6 +3,7 @@ export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/albert_base_v2
 export DATA_DIR=$CURRENT_DIR/dataset
 export OUTPUR_DIR=$CURRENT_DIR/outputs
 TASK_NAME="qqp"
+
 python run_classifier.py \
   --model_type=albert \
   --model_name_or_path=$BERT_BASE_DIR \
diff --git a/albert_english_pytorch/run_classifier_sst2.sh b/albert_english_pytorch/run_classifier_sst2.sh
@@ -20,5 +20,6 @@ python run_classifier.py \
   --logging_steps=4210 \
   --save_steps=4210 \
   --output_dir=$OUTPUR_DIR/${TASK_NAME}_output/ \
-  --overwrite_output_dir
+  --overwrite_output_dir \
+  --seed=42
 
diff --git a/albert_english_pytorch/run_classifier_stsb.sh b/albert_english_pytorch/run_classifier_stsb.sh
@@ -7,8 +7,8 @@ python run_classifier.py \
   --model_type=albert \
   --model_name_or_path=$BERT_BASE_DIR \
   --task_name=$TASK_NAME \
-  --do_train \
   --do_eval \
+  --do_train \
   --do_lower_case \
   --data_dir=$DATA_DIR/${TASK_NAME}/ \
   --max_seq_length=128 \
diff --git a/albert_english_pytorch/tools/common.py b/albert_english_pytorch/tools/common.py
@@ -10,6 +10,7 @@
 import logging
 
 logger = logging.getLogger()
+
 def init_logger(log_file=None, log_file_level=logging.NOTSET):
     '''
     Example:

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,6 @@`
`23`	`23`	`'Ranger',`
`24`	`24`	`'BertAdam'`
`25`	`25`	`]`
`26`		`-`
`27`	`26`	`class SGDW(Optimizer):`
`28`	`27`	`r"""Implements stochastic gradient descent (optionally with momentum) with`
`29`	`28`	weight decay from the paper `Fixing Weight Decay Regularization in Adam`_.
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,6 @@`
`46`	`46`	`'albert': (BertConfig, AlbertForSequenceClassification, BertTokenizer)`
`47`	`47`	`}`
`48`	`48`
`49`		`-`
`50`	`49`	`def train(args, train_dataset, model, tokenizer):`
`51`	`50`	`""" Train the model """`
`52`	`51`	`args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`import time`
`2`		`-`
`3`	`2`	`class ProgressBar(object):`
`4`	`3`	`'''`
`5`	`4`	`custom progress bar`
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-#encoding:utf-8`
	`1`	`+#encoding:utf-8`
	`2`	`+`
Original file line number	Diff line number	Diff line change
`@@ -2,3 +2,4 @@`
`2`	`2`	`from .glue import (glue_output_modes, glue_processors, glue_tasks_num_labels,`
`3`	`3`	`glue_convert_examples_to_features,collate_fn)`
`4`	`4`
	`5`	`+`