wmt-2020-pl-en/tau_fairseq.ipynb

235 KiB
Raw Permalink Blame History

#importy - wiadomo
!pip install fastBPE regex requests sacremoses subword-nmt omegaconf hydra-core torch fairseq numpy
Collecting fastBPE
  Downloading https://files.pythonhosted.org/packages/e1/37/f97181428a5d151501b90b2cebedf97c81b034ace753606a3cda5ad4e6e2/fastBPE-0.1.0.tar.gz
Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (2019.12.20)
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (2.23.0)
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
     |████████████████████████████████| 890kB 7.2MB/s 
[?25hCollecting subword-nmt
  Downloading https://files.pythonhosted.org/packages/74/60/6600a7bc09e7ab38bc53a48a20d8cae49b837f93f5842a41fe513a694912/subword_nmt-0.3.7-py2.py3-none-any.whl
Collecting omegaconf
  Downloading https://files.pythonhosted.org/packages/d0/eb/9d63ce09dd8aa85767c65668d5414958ea29648a0eec80a4a7d311ec2684/omegaconf-2.0.6-py3-none-any.whl
Collecting hydra-core
[?25l  Downloading https://files.pythonhosted.org/packages/52/e3/fbd70dd0d3ce4d1d75c22d56c0c9f895cfa7ed6587a9ffb821d6812d6a60/hydra_core-1.0.6-py3-none-any.whl (123kB)
     |████████████████████████████████| 133kB 28.5MB/s 
[?25hRequirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.7.0+cu101)
Collecting fairseq
[?25l  Downloading https://files.pythonhosted.org/packages/61/7b/2c90e007d737f4a2b7cd5066ac3a3d88acb2ce765972a61c308914c95568/fairseq-0.10.2-cp36-cp36m-manylinux1_x86_64.whl (1.7MB)
     |████████████████████████████████| 1.7MB 29.9MB/s 
[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (1.19.5)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests) (2020.12.5)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests) (1.24.3)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests) (3.0.4)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests) (2.10)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses) (1.15.0)
Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses) (7.1.2)
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses) (1.0.0)
Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from sacremoses) (4.41.1)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from omegaconf) (3.7.4.3)
Collecting PyYAML>=5.1.*
[?25l  Downloading https://files.pythonhosted.org/packages/7a/5b/bc0b5ab38247bba158504a410112b6c03f153c652734ece1849749e5f518/PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl (640kB)
     |████████████████████████████████| 645kB 37.6MB/s 
[?25hRequirement already satisfied: dataclasses; python_version == "3.6" in /usr/local/lib/python3.6/dist-packages (from omegaconf) (0.8)
Collecting antlr4-python3-runtime==4.8
[?25l  Downloading https://files.pythonhosted.org/packages/56/02/789a0bddf9c9b31b14c3e79ec22b9656185a803dc31c15f006f9855ece0d/antlr4-python3-runtime-4.8.tar.gz (112kB)
     |████████████████████████████████| 112kB 39.7MB/s 
[?25hRequirement already satisfied: importlib-resources; python_version < "3.9" in /usr/local/lib/python3.6/dist-packages (from hydra-core) (5.1.0)
Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch) (0.16.0)
Requirement already satisfied: cffi in /usr/local/lib/python3.6/dist-packages (from fairseq) (1.14.4)
Collecting sacrebleu>=1.4.12
[?25l  Downloading https://files.pythonhosted.org/packages/3b/7f/4fd83db8570288c3899d8e57666c2841403c15659f3d792a3cb8dc1c6689/sacrebleu-1.5.0-py3-none-any.whl (65kB)
     |████████████████████████████████| 71kB 7.9MB/s 
[?25hRequirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (from fairseq) (0.29.21)
Requirement already satisfied: zipp>=0.4; python_version < "3.8" in /usr/local/lib/python3.6/dist-packages (from importlib-resources; python_version < "3.9"->hydra-core) (3.4.0)
Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi->fairseq) (2.20)
Collecting portalocker
  Downloading https://files.pythonhosted.org/packages/82/22/e684c9e2e59b561dbe36538852e81849122c666c423448e3a5c99362c228/portalocker-2.2.1-py2.py3-none-any.whl
Building wheels for collected packages: fastBPE, sacremoses, antlr4-python3-runtime
  Building wheel for fastBPE (setup.py) ... [?25l[?25hdone
  Created wheel for fastBPE: filename=fastBPE-0.1.0-cp36-cp36m-linux_x86_64.whl size=481486 sha256=d50e17ebcee6e5b3a62a9e88093d27d1a392afb97cb57a33393770edce3b6360
  Stored in directory: /root/.cache/pip/wheels/f3/0c/9c/fc62058b4d473a5602bcd3d3edfece796f123875379ea82d79
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893261 sha256=ff7bf4480f907cde97018571267ad97c5a2f2731667fe4eb218c89baa2037960
  Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25l[?25hdone
  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.8-cp36-none-any.whl size=141231 sha256=33d9ac700af9fffa36a2e7c818f5235ef807d94f5505cf178d307571d4de2de6
  Stored in directory: /root/.cache/pip/wheels/e3/e2/fa/b78480b448b8579ddf393bebd3f47ee23aa84c89b6a78285c8
Successfully built fastBPE sacremoses antlr4-python3-runtime
Installing collected packages: fastBPE, sacremoses, subword-nmt, PyYAML, omegaconf, antlr4-python3-runtime, hydra-core, portalocker, sacrebleu, fairseq
  Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-5.4.1 antlr4-python3-runtime-4.8 fairseq-0.10.2 fastBPE-0.1.0 hydra-core-1.0.6 omegaconf-2.0.6 portalocker-2.2.1 sacrebleu-1.5.0 sacremoses-0.0.43 subword-nmt-0.3.7
#google drive
from google.colab import drive
%cd drive/MyDrive/tau_fairseq/fairseq
/content/drive/MyDrive/tau_fairseq/fairseq
#Preprocessing korpusow - usuwanie z nich znakow, ktore moga psuc proces tlumaczenia
import random
import re

with open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/expected.tsv",encoding="utf-8") as en:
    content_en = en.readlines()

with open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/in.tsv",encoding="utf-8") as pl:
    content_pl = pl.readlines()

c = list(zip(content_en, content_pl))
random.shuffle(c)
content_en, content_pl = zip(*c)

length = len(content_en)
per_train = int(length * 0.8)
per_test = length - per_train - int(length * 0.05)
per_env = length - per_train - per_test
train_test = per_train + per_test

cor_train_en = open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.en","w+",encoding="utf-8")
cor_train_pl = open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.pl","w+",encoding="utf-8")
cor_test_en = open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.en","w+",encoding="utf-8")
cor_test_pl = open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.pl","w+",encoding="utf-8")
cor_valid_en = open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.en","w+",encoding="utf-8")
cor_valid_pl = open("/content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.pl","w+",encoding="utf-8")

for i in range(length):
    line_pl = re.sub(r'[^A-Za-z0-9ĄąŻżŹźĆćŃńŚśĘęÓóŁłÄäÖöẞßÜü\.\/\\\\\'\"\;\:\<\>\,\!\?\@\$\%\&\*\(\)\_\-\+\=\\{\\}\[\] \n]+','', content_pl[i])
    line_en = re.sub(r'[^A-Za-z0-9ĄąŻżŹźĆćŃńŚśĘęÓóŁłÄäÖöẞßÜü\.\/\\\\\'\"\;\:\<\>\,\!\?\@\$\%\&\*\(\)\_\-\+\=\\{\\}\[\] \n]+','', content_en[i])
    if i < per_test:
        cor_test_en.write(line_en)
        cor_test_pl.write(line_pl)
    elif i < train_test:
        cor_train_en.write(line_en)
        cor_train_pl.write(line_pl)
    else:
        cor_valid_en.write(line_en)
        cor_valid_pl.write(line_pl)

cor_train_en.close()
cor_train_pl.close()
cor_test_en.close()
cor_test_pl.close()
cor_valid_en.close()
cor_valid_pl.close()
#Moses, to samo co wcześniej
# !git clone https://github.com/moses-smt/mosesdecoder.git
#Tokenizacja
!perl 'mosesdecoder/scripts/tokenizer/tokenizer.perl' -threads 8 -l en < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.tok.en
!perl 'mosesdecoder/scripts/tokenizer/tokenizer.perl' -threads 8 -l pl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.tok.pl
!perl 'mosesdecoder/scripts/tokenizer/tokenizer.perl' -threads 8 -l en < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.tok.en
!perl 'mosesdecoder/scripts/tokenizer/tokenizer.perl' -threads 8 -l pl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.tok.pl
!perl 'mosesdecoder/scripts/tokenizer/tokenizer.perl' -threads 8 -l en < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.tok.en
!perl 'mosesdecoder/scripts/tokenizer/tokenizer.perl' -threads 8 -l pl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.tok.pl
#Lowercasing
!perl mosesdecoder/scripts/tokenizer/lowercase.perl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.tok.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.low.en
!perl mosesdecoder/scripts/tokenizer/lowercase.perl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.tok.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.low.pl
!perl mosesdecoder/scripts/tokenizer/lowercase.perl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.tok.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.low.en
!perl mosesdecoder/scripts/tokenizer/lowercase.perl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.tok.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.low.pl
!perl mosesdecoder/scripts/tokenizer/lowercase.perl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.tok.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.low.en
!perl mosesdecoder/scripts/tokenizer/lowercase.perl < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.tok.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.low.pl
#Cleaning
!perl mosesdecoder/scripts/training/clean-corpus-n.perl /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.low en pl /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.clean 1 80
!perl mosesdecoder/scripts/training/clean-corpus-n.perl /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.low en pl /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.clean 1 80
!perl mosesdecoder/scripts/training/clean-corpus-n.perl /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.low en pl /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.clean 1 80
#Subword_nmt, to samo co wcześniej
!git clone https://github.com/rsennrich/subword-nmt.git
#Z dokumentacji fairseqa/tau - laczenie corpusow w jeden plik, gdzie najpierw sa zdania po de, a potem po pl
!cat corpus/bitext_train.clean.en corpus/bitext_train.clean.pl > corpus/bitext_train.clean.en-pl
#Learn bpe na polaczonych korpusach - tez z dokumentacji
!python subword-nmt/subword_nmt/learn_bpe.py -s 10000 < corpus/bitext_train.clean.en-pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/code
#Apply bpe - dokumentacja
!python subword-nmt/subword_nmt/apply_bpe.py -c /content/drive/MyDrive/tau_fairseq/fairseq/corpus/code < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.clean.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/test.pl
!python subword-nmt/subword_nmt/apply_bpe.py -c /content/drive/MyDrive/tau_fairseq/fairseq/corpus/code < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_test.clean.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/test.en
!python subword-nmt/subword_nmt/apply_bpe.py -c /content/drive/MyDrive/tau_fairseq/fairseq/corpus/code < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.clean.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/train.pl
!python subword-nmt/subword_nmt/apply_bpe.py -c /content/drive/MyDrive/tau_fairseq/fairseq/corpus/code < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_train.clean.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/train.en
!python subword-nmt/subword_nmt/apply_bpe.py -c /content/drive/MyDrive/tau_fairseq/fairseq/corpus/code < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.clean.pl > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/valid.pl
!python subword-nmt/subword_nmt/apply_bpe.py -c /content/drive/MyDrive/tau_fairseq/fairseq/corpus/code < /content/drive/MyDrive/tau_fairseq/fairseq/corpus/bitext_valid.clean.en > /content/drive/MyDrive/tau_fairseq/fairseq/corpus/valid.en
!pip uninstall apex
!git clone https://github.com/NVIDIA/apex
%cd apex
!rm -rf build/
!pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
%cd ..
WARNING: Skipping apex as it is not installed.
fatal: destination path 'apex' already exists and is not an empty directory.
/content/drive/My Drive/tau_fairseq/fairseq/apex
/usr/local/lib/python3.6/dist-packages/pip/_internal/commands/install.py:283: UserWarning: Disabling all use of wheels due to the use of --build-options / --global-options / --install-options.
  cmdoptions.check_install_build_global(options)
Created temporary directory: /tmp/pip-ephem-wheel-cache-zujeph16
Created temporary directory: /tmp/pip-req-tracker-tfcuq84x
Created requirements tracker '/tmp/pip-req-tracker-tfcuq84x'
Created temporary directory: /tmp/pip-install-gp63egps
Processing /content/drive/My Drive/tau_fairseq/fairseq/apex
  Created temporary directory: /tmp/pip-req-build-1z3lcbjz
  Added file:///content/drive/My%20Drive/tau_fairseq/fairseq/apex to build tracker '/tmp/pip-req-tracker-tfcuq84x'
    Running setup.py (path:/tmp/pip-req-build-1z3lcbjz/setup.py) egg_info for package from file:///content/drive/My%20Drive/tau_fairseq/fairseq/apex
    Running command python setup.py egg_info


    torch.__version__  = 1.7.0+cu101


    running egg_info
    creating /tmp/pip-req-build-1z3lcbjz/pip-egg-info/apex.egg-info
    writing /tmp/pip-req-build-1z3lcbjz/pip-egg-info/apex.egg-info/PKG-INFO
    writing dependency_links to /tmp/pip-req-build-1z3lcbjz/pip-egg-info/apex.egg-info/dependency_links.txt
    writing top-level names to /tmp/pip-req-build-1z3lcbjz/pip-egg-info/apex.egg-info/top_level.txt
    writing manifest file '/tmp/pip-req-build-1z3lcbjz/pip-egg-info/apex.egg-info/SOURCES.txt'
    writing manifest file '/tmp/pip-req-build-1z3lcbjz/pip-egg-info/apex.egg-info/SOURCES.txt'
    /tmp/pip-req-build-1z3lcbjz/setup.py:67: UserWarning: Option --pyprof not specified. Not installing PyProf dependencies!
      warnings.warn("Option --pyprof not specified. Not installing PyProf dependencies!")
  Source in /tmp/pip-req-build-1z3lcbjz has version 0.1, which satisfies requirement apex==0.1 from file:///content/drive/My%20Drive/tau_fairseq/fairseq/apex
  Removed apex==0.1 from file:///content/drive/My%20Drive/tau_fairseq/fairseq/apex from build tracker '/tmp/pip-req-tracker-tfcuq84x'
Skipping wheel build for apex, due to binaries being disabled for it.
Installing collected packages: apex
  Created temporary directory: /tmp/pip-record-rbk1kdhr
    Running command /usr/bin/python3 -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-req-build-1z3lcbjz/setup.py'"'"'; __file__='"'"'/tmp/pip-req-build-1z3lcbjz/setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' --cpp_ext --cuda_ext install --record /tmp/pip-record-rbk1kdhr/install-record.txt --single-version-externally-managed --compile


    torch.__version__  = 1.7.0+cu101


    /tmp/pip-req-build-1z3lcbjz/setup.py:67: UserWarning: Option --pyprof not specified. Not installing PyProf dependencies!
      warnings.warn("Option --pyprof not specified. Not installing PyProf dependencies!")

    Compiling cuda extensions with
    nvcc: NVIDIA (R) Cuda compiler driver
    Copyright (c) 2005-2019 NVIDIA Corporation
    Built on Sun_Jul_28_19:07:16_PDT_2019
    Cuda compilation tools, release 10.1, V10.1.243
    from /usr/local/cuda/bin

    running install
    running build
    running build_py
    creating build
    creating build/lib.linux-x86_64-3.6
    creating build/lib.linux-x86_64-3.6/apex
    copying apex/__init__.py -> build/lib.linux-x86_64-3.6/apex
    creating build/lib.linux-x86_64-3.6/apex/fp16_utils
    copying apex/fp16_utils/fp16util.py -> build/lib.linux-x86_64-3.6/apex/fp16_utils
    copying apex/fp16_utils/__init__.py -> build/lib.linux-x86_64-3.6/apex/fp16_utils
    copying apex/fp16_utils/fp16_optimizer.py -> build/lib.linux-x86_64-3.6/apex/fp16_utils
    copying apex/fp16_utils/loss_scaler.py -> build/lib.linux-x86_64-3.6/apex/fp16_utils
    creating build/lib.linux-x86_64-3.6/apex/normalization
    copying apex/normalization/fused_layer_norm.py -> build/lib.linux-x86_64-3.6/apex/normalization
    copying apex/normalization/__init__.py -> build/lib.linux-x86_64-3.6/apex/normalization
    creating build/lib.linux-x86_64-3.6/apex/pyprof
    copying apex/pyprof/__init__.py -> build/lib.linux-x86_64-3.6/apex/pyprof
    creating build/lib.linux-x86_64-3.6/apex/mlp
    copying apex/mlp/mlp.py -> build/lib.linux-x86_64-3.6/apex/mlp
    copying apex/mlp/__init__.py -> build/lib.linux-x86_64-3.6/apex/mlp
    creating build/lib.linux-x86_64-3.6/apex/RNN
    copying apex/RNN/cells.py -> build/lib.linux-x86_64-3.6/apex/RNN
    copying apex/RNN/RNNBackend.py -> build/lib.linux-x86_64-3.6/apex/RNN
    copying apex/RNN/models.py -> build/lib.linux-x86_64-3.6/apex/RNN
    copying apex/RNN/__init__.py -> build/lib.linux-x86_64-3.6/apex/RNN
    creating build/lib.linux-x86_64-3.6/apex/multi_tensor_apply
    copying apex/multi_tensor_apply/__init__.py -> build/lib.linux-x86_64-3.6/apex/multi_tensor_apply
    copying apex/multi_tensor_apply/multi_tensor_apply.py -> build/lib.linux-x86_64-3.6/apex/multi_tensor_apply
    creating build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/LARC.py -> build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/multiproc.py -> build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/optimized_sync_batchnorm.py -> build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/sync_batchnorm_kernel.py -> build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/optimized_sync_batchnorm_kernel.py -> build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/__init__.py -> build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/distributed.py -> build/lib.linux-x86_64-3.6/apex/parallel
    copying apex/parallel/sync_batchnorm.py -> build/lib.linux-x86_64-3.6/apex/parallel
    creating build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/opt.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/utils.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/scaler.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/wrap.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/_initialize.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/frontend.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/__init__.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/_amp_state.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/compat.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/rnn_compat.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/handle.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/__version__.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/_process_optimizer.py -> build/lib.linux-x86_64-3.6/apex/amp
    copying apex/amp/amp.py -> build/lib.linux-x86_64-3.6/apex/amp
    creating build/lib.linux-x86_64-3.6/apex/optimizers
    copying apex/optimizers/fused_novograd.py -> build/lib.linux-x86_64-3.6/apex/optimizers
    copying apex/optimizers/__init__.py -> build/lib.linux-x86_64-3.6/apex/optimizers
    copying apex/optimizers/fused_sgd.py -> build/lib.linux-x86_64-3.6/apex/optimizers
    copying apex/optimizers/fused_adagrad.py -> build/lib.linux-x86_64-3.6/apex/optimizers
    copying apex/optimizers/fused_lamb.py -> build/lib.linux-x86_64-3.6/apex/optimizers
    copying apex/optimizers/fused_adam.py -> build/lib.linux-x86_64-3.6/apex/optimizers
    creating build/lib.linux-x86_64-3.6/apex/reparameterization
    copying apex/reparameterization/__init__.py -> build/lib.linux-x86_64-3.6/apex/reparameterization
    copying apex/reparameterization/reparameterization.py -> build/lib.linux-x86_64-3.6/apex/reparameterization
    copying apex/reparameterization/weight_norm.py -> build/lib.linux-x86_64-3.6/apex/reparameterization
    creating build/lib.linux-x86_64-3.6/apex/contrib
    copying apex/contrib/__init__.py -> build/lib.linux-x86_64-3.6/apex/contrib
    creating build/lib.linux-x86_64-3.6/apex/pyprof/parse
    copying apex/pyprof/parse/nvvp.py -> build/lib.linux-x86_64-3.6/apex/pyprof/parse
    copying apex/pyprof/parse/parse.py -> build/lib.linux-x86_64-3.6/apex/pyprof/parse
    copying apex/pyprof/parse/db.py -> build/lib.linux-x86_64-3.6/apex/pyprof/parse
    copying apex/pyprof/parse/kernel.py -> build/lib.linux-x86_64-3.6/apex/pyprof/parse
    copying apex/pyprof/parse/__init__.py -> build/lib.linux-x86_64-3.6/apex/pyprof/parse
    copying apex/pyprof/parse/__main__.py -> build/lib.linux-x86_64-3.6/apex/pyprof/parse
    creating build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/randomSample.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/index_slice_join_mutate.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/prof.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/dropout.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/reduction.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/data.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/softmax.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/usage.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/activation.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/embedding.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/convert.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/conv.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/utility.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/output.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/__init__.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/linear.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/misc.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/optim.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/blas.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/normalization.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/recurrentCell.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/pointwise.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/loss.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/pooling.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/__main__.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    copying apex/pyprof/prof/base.py -> build/lib.linux-x86_64-3.6/apex/pyprof/prof
    creating build/lib.linux-x86_64-3.6/apex/pyprof/nvtx
    copying apex/pyprof/nvtx/nvmarker.py -> build/lib.linux-x86_64-3.6/apex/pyprof/nvtx
    copying apex/pyprof/nvtx/__init__.py -> build/lib.linux-x86_64-3.6/apex/pyprof/nvtx
    creating build/lib.linux-x86_64-3.6/apex/amp/lists
    copying apex/amp/lists/tensor_overrides.py -> build/lib.linux-x86_64-3.6/apex/amp/lists
    copying apex/amp/lists/__init__.py -> build/lib.linux-x86_64-3.6/apex/amp/lists
    copying apex/amp/lists/functional_overrides.py -> build/lib.linux-x86_64-3.6/apex/amp/lists
    copying apex/amp/lists/torch_overrides.py -> build/lib.linux-x86_64-3.6/apex/amp/lists
    creating build/lib.linux-x86_64-3.6/apex/contrib/xentropy
    copying apex/contrib/xentropy/softmax_xentropy.py -> build/lib.linux-x86_64-3.6/apex/contrib/xentropy
    copying apex/contrib/xentropy/__init__.py -> build/lib.linux-x86_64-3.6/apex/contrib/xentropy
    creating build/lib.linux-x86_64-3.6/apex/contrib/sparsity
    copying apex/contrib/sparsity/asp.py -> build/lib.linux-x86_64-3.6/apex/contrib/sparsity
    copying apex/contrib/sparsity/__init__.py -> build/lib.linux-x86_64-3.6/apex/contrib/sparsity
    copying apex/contrib/sparsity/sparse_masklib.py -> build/lib.linux-x86_64-3.6/apex/contrib/sparsity
    creating build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/distributed_fused_adam_v3.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/distributed_fused_adam.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/__init__.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/distributed_fused_lamb.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/fused_sgd.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/fused_lamb.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/distributed_fused_adam_v2.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/fp16_optimizer.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    copying apex/contrib/optimizers/fused_adam.py -> build/lib.linux-x86_64-3.6/apex/contrib/optimizers
    creating build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/self_multihead_attn.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/__init__.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/self_multihead_attn_func.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/encdec_multihead_attn.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/fast_self_multihead_attn_func.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/mask_softmax_dropout_func.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    copying apex/contrib/multihead_attn/encdec_multihead_attn_func.py -> build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn
    creating build/lib.linux-x86_64-3.6/apex/contrib/groupbn
    copying apex/contrib/groupbn/batch_norm.py -> build/lib.linux-x86_64-3.6/apex/contrib/groupbn
    copying apex/contrib/groupbn/__init__.py -> build/lib.linux-x86_64-3.6/apex/contrib/groupbn
    running build_ext
    /usr/local/lib/python3.6/dist-packages/torch/utils/cpp_extension.py:339: UserWarning: Attempted to use ninja as the BuildExtension backend but we could not find ninja.. Falling back to using the slow distutils backend.
      warnings.warn(msg.format('we could not find ninja.'))
    building 'apex_C' extension
    creating build/temp.linux-x86_64-3.6
    creating build/temp.linux-x86_64-3.6/csrc
    x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/include/python3.6m -c csrc/flatten_unflatten.cpp -o build/temp.linux-x86_64-3.6/csrc/flatten_unflatten.o -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=apex_C -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Parallel.h:149:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/utils.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h:5,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:12,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/flatten_unflatten.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ParallelOpenMP.h:84:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas]
     #pragma omp parallel for if ((end - begin) >= grain_size)

    In file included from csrc/flatten_unflatten.cpp:2:0:
    /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/utils/tensor_flatten.h: In member function at::DeprecatedTypeProperties& torch::utils::TensorGroup::type():
    /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/utils/tensor_flatten.h:36:28: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
         return tensors[0].type();
                                ^
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/flatten_unflatten.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    x86_64-linux-gnu-g++ -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.6/csrc/flatten_unflatten.o -L/usr/local/lib/python3.6/dist-packages/torch/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -o build/lib.linux-x86_64-3.6/apex_C.cpython-36m-x86_64-linux-gnu.so
    building 'amp_C' extension
    x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/amp_C_frontend.cpp -o build/temp.linux-x86_64-3.6/csrc/amp_C_frontend.o -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Parallel.h:149:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/utils.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h:5,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:12,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/amp_C_frontend.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ParallelOpenMP.h:84:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas]
     #pragma omp parallel for if ((end - begin) >= grain_size)

    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_sgd_kernel.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_sgd_kernel.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_scale_kernel.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_scale_kernel.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_axpby_kernel.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_axpby_kernel.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_l2norm_kernel.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_l2norm_kernel.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_lamb_stage_1.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_lamb_stage_1.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_lamb_stage_2.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_lamb_stage_2.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_adam.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_adam.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_adagrad.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_adagrad.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_novograd.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_novograd.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/multi_tensor_lamb.cu -o build/temp.linux-x86_64-3.6/csrc/multi_tensor_lamb.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=amp_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    x86_64-linux-gnu-g++ -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.6/csrc/amp_C_frontend.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_sgd_kernel.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_scale_kernel.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_axpby_kernel.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_l2norm_kernel.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_lamb_stage_1.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_lamb_stage_2.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_adam.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_adagrad.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_novograd.o build/temp.linux-x86_64-3.6/csrc/multi_tensor_lamb.o -L/usr/local/lib/python3.6/dist-packages/torch/lib -L/usr/local/cuda/lib64 -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.6/amp_C.cpython-36m-x86_64-linux-gnu.so
    building 'syncbn' extension
    x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/syncbn.cpp -o build/temp.linux-x86_64-3.6/csrc/syncbn.o -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=syncbn -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Parallel.h:149:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/utils.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h:5,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:12,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/syncbn.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ParallelOpenMP.h:84:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas]
     #pragma omp parallel for if ((end - begin) >= grain_size)

    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/welford.cu -o build/temp.linux-x86_64-3.6/csrc/welford.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=syncbn -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    x86_64-linux-gnu-g++ -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.6/csrc/syncbn.o build/temp.linux-x86_64-3.6/csrc/welford.o -L/usr/local/lib/python3.6/dist-packages/torch/lib -L/usr/local/cuda/lib64 -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.6/syncbn.cpython-36m-x86_64-linux-gnu.so
    building 'fused_layer_norm_cuda' extension
    x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/layer_norm_cuda.cpp -o build/temp.linux-x86_64-3.6/csrc/layer_norm_cuda.o -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=fused_layer_norm_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Parallel.h:149:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/utils.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h:5,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:12,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ParallelOpenMP.h:84:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas]
     #pragma omp parallel for if ((end - begin) >= grain_size)

    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp: In function std::vector<at::Tensor> layer_norm(at::Tensor, c10::IntArrayRef, double):
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:129:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(input);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp: In function std::vector<at::Tensor> layer_norm_affine(at::Tensor, c10::IntArrayRef, at::Tensor, at::Tensor, double):
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:149:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(input);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:150:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(gamma);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:151:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(beta);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp: In function at::Tensor layer_norm_gradient(at::Tensor, at::Tensor, at::Tensor, at::Tensor, c10::IntArrayRef, double):
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:193:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(dout);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:194:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(mean);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:195:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(invvar);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:196:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(input);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp: In function std::vector<at::Tensor> layer_norm_gradient_affine(at::Tensor, at::Tensor, at::Tensor, at::Tensor, c10::IntArrayRef, at::Tensor, at::Tensor, double):
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:218:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(dout);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:219:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(mean);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:220:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(invvar);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:221:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(input);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:222:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(gamma);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/DeviceType.h:8:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Device.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    csrc/layer_norm_cuda.cpp:117:42: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                                              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/macros/Macros.h:171:65: note: in definition of macro C10_UNLIKELY
     #define C10_UNLIKELY(expr)  (__builtin_expect(static_cast<bool>(expr), 0))
                                                                     ^~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:330:7: note: in expansion of macro C10_UNLIKELY_OR_CONST
       if (C10_UNLIKELY_OR_CONST(!(cond))) {                               \
           ^~~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:318:3: note: in expansion of macro TORCH_CHECK_WITH_MSG
       TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__)
       ^~~~~~~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/c10/util/Exception.h:341:32: note: in expansion of macro TORCH_CHECK_WITH
     #define TORCH_CHECK(cond, ...) TORCH_CHECK_WITH(Error, cond, __VA_ARGS__)
                                    ^~~~~~~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:117:23: note: in expansion of macro TORCH_CHECK
     #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
                           ^~~~~~~~~~~
    csrc/layer_norm_cuda.cpp:119:24: note: in expansion of macro CHECK_CUDA
     #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
                            ^~~~~~~~~~
    csrc/layer_norm_cuda.cpp:223:3: note: in expansion of macro CHECK_INPUT
       CHECK_INPUT(beta);
       ^~~~~~~~~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/layer_norm_cuda.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/layer_norm_cuda_kernel.cu -o build/temp.linux-x86_64-3.6/csrc/layer_norm_cuda_kernel.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -maxrregcount=50 -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=fused_layer_norm_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    x86_64-linux-gnu-g++ -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.6/csrc/layer_norm_cuda.o build/temp.linux-x86_64-3.6/csrc/layer_norm_cuda_kernel.o -L/usr/local/lib/python3.6/dist-packages/torch/lib -L/usr/local/cuda/lib64 -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.6/fused_layer_norm_cuda.cpython-36m-x86_64-linux-gnu.so
    building 'mlp_cuda' extension
    x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/mlp.cpp -o build/temp.linux-x86_64-3.6/csrc/mlp.o -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=mlp_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Parallel.h:149:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/utils.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h:5,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/nn.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:12,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ParallelOpenMP.h:84:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas]
     #pragma omp parallel for if ((end - begin) >= grain_size)

    csrc/mlp.cpp: In function std::vector<at::Tensor> mlp_forward(int, int, std::vector<at::Tensor>):
    csrc/mlp.cpp:56:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (int i = 0; i < num_layers; i++) {
                       ~~^~~~~~~~~~~~
    csrc/mlp.cpp:64:77: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
       auto out = at::empty({batch_size, output_features.back()}, inputs[0].type());
                                                                                 ^
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    csrc/mlp.cpp:65:67: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
       auto reserved_space = at::empty({reserved_size}, inputs[0].type());
                                                                       ^
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    csrc/mlp.cpp:65:68: warning: narrowing conversion of reserved_size from long unsigned int to long int inside { } [-Wnarrowing]
       auto reserved_space = at::empty({reserved_size}, inputs[0].type());
                                                                        ^
    csrc/mlp.cpp:65:68: warning: narrowing conversion of reserved_size from long unsigned int to long int inside { } [-Wnarrowing]
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:13:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:67:54: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
                                                          ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:150:28: note: in definition of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
         const auto& the_type = TYPE;                                            \
                                ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:13:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:152:56: warning: c10::ScalarType detail::scalar_type(const at::DeprecatedTypeProperties&) is deprecated: passing at::DeprecatedTypeProperties to an AT_DISPATCH macro is deprecated, pass an at::ScalarType instead [-Wdeprecated-declarations]
         at::ScalarType _st = ::detail::scalar_type(the_type);                   \
                                                            ^
    csrc/mlp.cpp:67:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
       ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:66:23: note: declared here
     inline at::ScalarType scalar_type(const at::DeprecatedTypeProperties& t) {
                           ^~~~~~~~~~~
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:70:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < num_layers; i++) {
                         ~~^~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:67:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
       ^
    csrc/mlp.cpp:76:10: warning: unused variable result [-Wunused-variable]
         auto result = mlp_fp<scalar_t>(
              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:67:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
       ^
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:70:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < num_layers; i++) {
                         ~~^~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:67:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
       ^
    csrc/mlp.cpp:76:10: warning: unused variable result [-Wunused-variable]
         auto result = mlp_fp<scalar_t>(
              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:67:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
       ^
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:70:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < num_layers; i++) {
                         ~~^~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:67:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
       ^
    csrc/mlp.cpp:76:10: warning: unused variable result [-Wunused-variable]
         auto result = mlp_fp<scalar_t>(
              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:67:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_forward", [&] {
       ^
    csrc/mlp.cpp: In function std::vector<at::Tensor> mlp_backward(int, int, at::Tensor, std::vector<at::Tensor>, std::vector<at::Tensor>):
    csrc/mlp.cpp:113:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (int i = 0; i < num_layers; i++) {
                       ~~^~~~~~~~~~~~
    csrc/mlp.cpp:119:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (int i = 0; i < inputs.size(); i++) {
                       ~~^~~~~~~~~~~~~~~
    csrc/mlp.cpp:120:67: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
         outputs.push_back(at::empty(inputs[i].sizes(), inputs[i].type()));  // clone for testing now
                                                                       ^
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:13:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:123:54: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
                                                          ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:150:28: note: in definition of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
         const auto& the_type = TYPE;                                            \
                                ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:13:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:152:56: warning: c10::ScalarType detail::scalar_type(const at::DeprecatedTypeProperties&) is deprecated: passing at::DeprecatedTypeProperties to an AT_DISPATCH macro is deprecated, pass an at::ScalarType instead [-Wdeprecated-declarations]
         at::ScalarType _st = ::detail::scalar_type(the_type);                   \
                                                            ^
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:66:23: note: declared here
     inline at::ScalarType scalar_type(const at::DeprecatedTypeProperties& t) {
                           ^~~~~~~~~~~
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:125:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < num_layers; i++) {
                         ~~^~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:129:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < inputs.size(); i++) {
                         ~~^~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:137:80: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                                                                    ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:13:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    csrc/mlp.cpp:137:44: warning: narrowing conversion of (work_size / sizeof (scalar_t)) from long unsigned int to long int inside { } [-Wnarrowing]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                      ~~~~~~~~~~^~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:137:44: warning: narrowing conversion of (work_size / sizeof (scalar_t)) from long unsigned int to long int inside { } [-Wnarrowing]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                      ~~~~~~~~~~^~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:139:10: warning: unused variable result [-Wunused-variable]
         auto result = mlp_bp<scalar_t>(
              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:125:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < num_layers; i++) {
                         ~~^~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:129:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < inputs.size(); i++) {
                         ~~^~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:137:80: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                                                                    ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:13:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    csrc/mlp.cpp:137:44: warning: narrowing conversion of (work_size / sizeof (scalar_t)) from long unsigned int to long int inside { } [-Wnarrowing]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                      ~~~~~~~~~~^~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:137:44: warning: narrowing conversion of (work_size / sizeof (scalar_t)) from long unsigned int to long int inside { } [-Wnarrowing]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                      ~~~~~~~~~~^~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:139:10: warning: unused variable result [-Wunused-variable]
         auto result = mlp_bp<scalar_t>(
              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp: In lambda function:
    csrc/mlp.cpp:125:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < num_layers; i++) {
                         ~~^~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:129:23: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
         for (int i = 0; i < inputs.size(); i++) {
                         ~~^~~~~~~~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:137:80: warning: at::DeprecatedTypeProperties& at::Tensor::type() const is deprecated: Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device(). [-Wdeprecated-declarations]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                                                                    ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Tensor.h:3:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Context.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:9,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/TensorBody.h:277:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.6/dist-packages/torch/include/ATen/ATen.h:13:0,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/data.h:3,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include/torch/all.h:8,
                     from /usr/local/lib/python3.6/dist-packages/torch/include/torch/extension.h:4,
                     from csrc/mlp.cpp:1:
    csrc/mlp.cpp:137:44: warning: narrowing conversion of (work_size / sizeof (scalar_t)) from long unsigned int to long int inside { } [-Wnarrowing]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                      ~~~~~~~~~~^~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:137:44: warning: narrowing conversion of (work_size / sizeof (scalar_t)) from long unsigned int to long int inside { } [-Wnarrowing]
         auto work_space = at::empty({work_size / sizeof(scalar_t)}, inputs[0].type());
                                      ~~~~~~~~~~^~~~~~~~~
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    csrc/mlp.cpp:139:10: warning: unused variable result [-Wunused-variable]
         auto result = mlp_bp<scalar_t>(
              ^
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/Dispatch.h:13:12: note: in definition of macro AT_PRIVATE_CASE_TYPE
         return __VA_ARGS__();                          \
                ^~~~~~~~~~~
    csrc/mlp.cpp:123:3: note: in expansion of macro AT_DISPATCH_FLOATING_TYPES_AND_HALF
       AT_DISPATCH_FLOATING_TYPES_AND_HALF(inputs[0].type(), "mlp_backward", [&] {
       ^
    /usr/local/cuda/bin/nvcc -I/usr/local/lib/python3.6/dist-packages/torch/include -I/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.6/dist-packages/torch/include/TH -I/usr/local/lib/python3.6/dist-packages/torch/include/THC -I/usr/local/cuda/include -I/usr/include/python3.6m -c csrc/mlp_cuda.cu -o build/temp.linux-x86_64-3.6/csrc/mlp_cuda.o -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=mlp_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_37,code=sm_37 -std=c++14
    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/boxing/impl/boxing.h(100): warning: integer conversion resulted in a change of sign

    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/op_registration/op_whitelist.h(39): warning: integer conversion resulted in a change of sign

    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/boxing/impl/boxing.h(100): warning: integer conversion resulted in a change of sign

    /usr/local/lib/python3.6/dist-packages/torch/include/ATen/core/op_registration/op_whitelist.h(39): warning: integer conversion resulted in a change of sign

    x86_64-linux-gnu-g++ -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.6/csrc/mlp.o build/temp.linux-x86_64-3.6/csrc/mlp_cuda.o -L/usr/local/lib/python3.6/dist-packages/torch/lib -L/usr/local/cuda/lib64 -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.6/mlp_cuda.cpython-36m-x86_64-linux-gnu.so
    running install_lib
    copying build/lib.linux-x86_64-3.6/syncbn.cpython-36m-x86_64-linux-gnu.so -> /usr/local/lib/python3.6/dist-packages
    copying build/lib.linux-x86_64-3.6/amp_C.cpython-36m-x86_64-linux-gnu.so -> /usr/local/lib/python3.6/dist-packages
    copying build/lib.linux-x86_64-3.6/fused_layer_norm_cuda.cpython-36m-x86_64-linux-gnu.so -> /usr/local/lib/python3.6/dist-packages
    copying build/lib.linux-x86_64-3.6/apex_C.cpython-36m-x86_64-linux-gnu.so -> /usr/local/lib/python3.6/dist-packages
    copying build/lib.linux-x86_64-3.6/mlp_cuda.cpython-36m-x86_64-linux-gnu.so -> /usr/local/lib/python3.6/dist-packages
    creating /usr/local/lib/python3.6/dist-packages/apex
    creating /usr/local/lib/python3.6/dist-packages/apex/fp16_utils
    copying build/lib.linux-x86_64-3.6/apex/fp16_utils/fp16util.py -> /usr/local/lib/python3.6/dist-packages/apex/fp16_utils
    copying build/lib.linux-x86_64-3.6/apex/fp16_utils/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/fp16_utils
    copying build/lib.linux-x86_64-3.6/apex/fp16_utils/fp16_optimizer.py -> /usr/local/lib/python3.6/dist-packages/apex/fp16_utils
    copying build/lib.linux-x86_64-3.6/apex/fp16_utils/loss_scaler.py -> /usr/local/lib/python3.6/dist-packages/apex/fp16_utils
    creating /usr/local/lib/python3.6/dist-packages/apex/normalization
    copying build/lib.linux-x86_64-3.6/apex/normalization/fused_layer_norm.py -> /usr/local/lib/python3.6/dist-packages/apex/normalization
    copying build/lib.linux-x86_64-3.6/apex/normalization/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/normalization
    creating /usr/local/lib/python3.6/dist-packages/apex/pyprof
    creating /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse
    copying build/lib.linux-x86_64-3.6/apex/pyprof/parse/nvvp.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse
    copying build/lib.linux-x86_64-3.6/apex/pyprof/parse/parse.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse
    copying build/lib.linux-x86_64-3.6/apex/pyprof/parse/db.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse
    copying build/lib.linux-x86_64-3.6/apex/pyprof/parse/kernel.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse
    copying build/lib.linux-x86_64-3.6/apex/pyprof/parse/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse
    copying build/lib.linux-x86_64-3.6/apex/pyprof/parse/__main__.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse
    creating /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/randomSample.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/index_slice_join_mutate.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/prof.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/dropout.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/reduction.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/data.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/softmax.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/usage.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/activation.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/embedding.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/convert.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/conv.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/utility.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/output.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/linear.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/misc.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/optim.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/blas.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/normalization.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/recurrentCell.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/pointwise.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/loss.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/pooling.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/__main__.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/prof/base.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof
    copying build/lib.linux-x86_64-3.6/apex/pyprof/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof
    creating /usr/local/lib/python3.6/dist-packages/apex/pyprof/nvtx
    copying build/lib.linux-x86_64-3.6/apex/pyprof/nvtx/nvmarker.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/nvtx
    copying build/lib.linux-x86_64-3.6/apex/pyprof/nvtx/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/pyprof/nvtx
    creating /usr/local/lib/python3.6/dist-packages/apex/mlp
    copying build/lib.linux-x86_64-3.6/apex/mlp/mlp.py -> /usr/local/lib/python3.6/dist-packages/apex/mlp
    copying build/lib.linux-x86_64-3.6/apex/mlp/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/mlp
    creating /usr/local/lib/python3.6/dist-packages/apex/RNN
    copying build/lib.linux-x86_64-3.6/apex/RNN/cells.py -> /usr/local/lib/python3.6/dist-packages/apex/RNN
    copying build/lib.linux-x86_64-3.6/apex/RNN/RNNBackend.py -> /usr/local/lib/python3.6/dist-packages/apex/RNN
    copying build/lib.linux-x86_64-3.6/apex/RNN/models.py -> /usr/local/lib/python3.6/dist-packages/apex/RNN
    copying build/lib.linux-x86_64-3.6/apex/RNN/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/RNN
    creating /usr/local/lib/python3.6/dist-packages/apex/multi_tensor_apply
    copying build/lib.linux-x86_64-3.6/apex/multi_tensor_apply/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/multi_tensor_apply
    copying build/lib.linux-x86_64-3.6/apex/multi_tensor_apply/multi_tensor_apply.py -> /usr/local/lib/python3.6/dist-packages/apex/multi_tensor_apply
    copying build/lib.linux-x86_64-3.6/apex/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex
    creating /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/LARC.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/multiproc.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/optimized_sync_batchnorm.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/sync_batchnorm_kernel.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/optimized_sync_batchnorm_kernel.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/distributed.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    copying build/lib.linux-x86_64-3.6/apex/parallel/sync_batchnorm.py -> /usr/local/lib/python3.6/dist-packages/apex/parallel
    creating /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/opt.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/utils.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/scaler.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/wrap.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/_initialize.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/frontend.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    creating /usr/local/lib/python3.6/dist-packages/apex/amp/lists
    copying build/lib.linux-x86_64-3.6/apex/amp/lists/tensor_overrides.py -> /usr/local/lib/python3.6/dist-packages/apex/amp/lists
    copying build/lib.linux-x86_64-3.6/apex/amp/lists/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/amp/lists
    copying build/lib.linux-x86_64-3.6/apex/amp/lists/functional_overrides.py -> /usr/local/lib/python3.6/dist-packages/apex/amp/lists
    copying build/lib.linux-x86_64-3.6/apex/amp/lists/torch_overrides.py -> /usr/local/lib/python3.6/dist-packages/apex/amp/lists
    copying build/lib.linux-x86_64-3.6/apex/amp/_amp_state.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/compat.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/rnn_compat.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/handle.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/__version__.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/_process_optimizer.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    copying build/lib.linux-x86_64-3.6/apex/amp/amp.py -> /usr/local/lib/python3.6/dist-packages/apex/amp
    creating /usr/local/lib/python3.6/dist-packages/apex/optimizers
    copying build/lib.linux-x86_64-3.6/apex/optimizers/fused_novograd.py -> /usr/local/lib/python3.6/dist-packages/apex/optimizers
    copying build/lib.linux-x86_64-3.6/apex/optimizers/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/optimizers
    copying build/lib.linux-x86_64-3.6/apex/optimizers/fused_sgd.py -> /usr/local/lib/python3.6/dist-packages/apex/optimizers
    copying build/lib.linux-x86_64-3.6/apex/optimizers/fused_adagrad.py -> /usr/local/lib/python3.6/dist-packages/apex/optimizers
    copying build/lib.linux-x86_64-3.6/apex/optimizers/fused_lamb.py -> /usr/local/lib/python3.6/dist-packages/apex/optimizers
    copying build/lib.linux-x86_64-3.6/apex/optimizers/fused_adam.py -> /usr/local/lib/python3.6/dist-packages/apex/optimizers
    creating /usr/local/lib/python3.6/dist-packages/apex/reparameterization
    copying build/lib.linux-x86_64-3.6/apex/reparameterization/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/reparameterization
    copying build/lib.linux-x86_64-3.6/apex/reparameterization/reparameterization.py -> /usr/local/lib/python3.6/dist-packages/apex/reparameterization
    copying build/lib.linux-x86_64-3.6/apex/reparameterization/weight_norm.py -> /usr/local/lib/python3.6/dist-packages/apex/reparameterization
    creating /usr/local/lib/python3.6/dist-packages/apex/contrib
    creating /usr/local/lib/python3.6/dist-packages/apex/contrib/xentropy
    copying build/lib.linux-x86_64-3.6/apex/contrib/xentropy/softmax_xentropy.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/xentropy
    copying build/lib.linux-x86_64-3.6/apex/contrib/xentropy/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/xentropy
    creating /usr/local/lib/python3.6/dist-packages/apex/contrib/sparsity
    copying build/lib.linux-x86_64-3.6/apex/contrib/sparsity/asp.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/sparsity
    copying build/lib.linux-x86_64-3.6/apex/contrib/sparsity/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/sparsity
    copying build/lib.linux-x86_64-3.6/apex/contrib/sparsity/sparse_masklib.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/sparsity
    copying build/lib.linux-x86_64-3.6/apex/contrib/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib
    creating /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/distributed_fused_adam_v3.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/distributed_fused_adam.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/distributed_fused_lamb.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/fused_sgd.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/fused_lamb.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/distributed_fused_adam_v2.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/fp16_optimizer.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    copying build/lib.linux-x86_64-3.6/apex/contrib/optimizers/fused_adam.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers
    creating /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/self_multihead_attn.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/self_multihead_attn_func.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/encdec_multihead_attn.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/fast_self_multihead_attn_func.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/mask_softmax_dropout_func.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    copying build/lib.linux-x86_64-3.6/apex/contrib/multihead_attn/encdec_multihead_attn_func.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn
    creating /usr/local/lib/python3.6/dist-packages/apex/contrib/groupbn
    copying build/lib.linux-x86_64-3.6/apex/contrib/groupbn/batch_norm.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/groupbn
    copying build/lib.linux-x86_64-3.6/apex/contrib/groupbn/__init__.py -> /usr/local/lib/python3.6/dist-packages/apex/contrib/groupbn
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/fp16_utils/fp16util.py to fp16util.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/fp16_utils/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/fp16_utils/fp16_optimizer.py to fp16_optimizer.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/fp16_utils/loss_scaler.py to loss_scaler.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/normalization/fused_layer_norm.py to fused_layer_norm.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/normalization/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse/nvvp.py to nvvp.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse/parse.py to parse.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse/db.py to db.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse/kernel.py to kernel.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/parse/__main__.py to __main__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/randomSample.py to randomSample.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/index_slice_join_mutate.py to index_slice_join_mutate.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/prof.py to prof.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/dropout.py to dropout.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/reduction.py to reduction.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/data.py to data.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/softmax.py to softmax.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/usage.py to usage.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/activation.py to activation.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/embedding.py to embedding.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/convert.py to convert.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/conv.py to conv.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/utility.py to utility.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/output.py to output.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/linear.py to linear.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/misc.py to misc.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/optim.py to optim.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/blas.py to blas.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/normalization.py to normalization.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/recurrentCell.py to recurrentCell.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/pointwise.py to pointwise.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/loss.py to loss.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/pooling.py to pooling.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/__main__.py to __main__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/prof/base.py to base.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/nvtx/nvmarker.py to nvmarker.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/pyprof/nvtx/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/mlp/mlp.py to mlp.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/mlp/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/RNN/cells.py to cells.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/RNN/RNNBackend.py to RNNBackend.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/RNN/models.py to models.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/RNN/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/multi_tensor_apply/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/multi_tensor_apply/multi_tensor_apply.py to multi_tensor_apply.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/LARC.py to LARC.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/multiproc.py to multiproc.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/optimized_sync_batchnorm.py to optimized_sync_batchnorm.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/sync_batchnorm_kernel.py to sync_batchnorm_kernel.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/optimized_sync_batchnorm_kernel.py to optimized_sync_batchnorm_kernel.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/distributed.py to distributed.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/parallel/sync_batchnorm.py to sync_batchnorm.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/opt.py to opt.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/utils.py to utils.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/scaler.py to scaler.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/wrap.py to wrap.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/_initialize.py to _initialize.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/frontend.py to frontend.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/lists/tensor_overrides.py to tensor_overrides.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/lists/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/lists/functional_overrides.py to functional_overrides.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/lists/torch_overrides.py to torch_overrides.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/_amp_state.py to _amp_state.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/compat.py to compat.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/rnn_compat.py to rnn_compat.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/handle.py to handle.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/__version__.py to __version__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/_process_optimizer.py to _process_optimizer.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/amp/amp.py to amp.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/optimizers/fused_novograd.py to fused_novograd.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/optimizers/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/optimizers/fused_sgd.py to fused_sgd.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/optimizers/fused_adagrad.py to fused_adagrad.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/optimizers/fused_lamb.py to fused_lamb.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/optimizers/fused_adam.py to fused_adam.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/reparameterization/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/reparameterization/reparameterization.py to reparameterization.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/reparameterization/weight_norm.py to weight_norm.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/xentropy/softmax_xentropy.py to softmax_xentropy.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/xentropy/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/sparsity/asp.py to asp.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/sparsity/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/sparsity/sparse_masklib.py to sparse_masklib.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/distributed_fused_adam_v3.py to distributed_fused_adam_v3.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/distributed_fused_adam.py to distributed_fused_adam.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/distributed_fused_lamb.py to distributed_fused_lamb.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/fused_sgd.py to fused_sgd.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/fused_lamb.py to fused_lamb.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/distributed_fused_adam_v2.py to distributed_fused_adam_v2.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/fp16_optimizer.py to fp16_optimizer.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/optimizers/fused_adam.py to fused_adam.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py to fast_encdec_multihead_attn_norm_add_func.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/self_multihead_attn.py to self_multihead_attn.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py to fast_self_multihead_attn_norm_add_func.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py to fast_encdec_multihead_attn_func.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/__init__.py to __init__.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/self_multihead_attn_func.py to self_multihead_attn_func.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/encdec_multihead_attn.py to encdec_multihead_attn.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/fast_self_multihead_attn_func.py to fast_self_multihead_attn_func.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/mask_softmax_dropout_func.py to mask_softmax_dropout_func.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/multihead_attn/encdec_multihead_attn_func.py to encdec_multihead_attn_func.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/groupbn/batch_norm.py to batch_norm.cpython-36.pyc
    byte-compiling /usr/local/lib/python3.6/dist-packages/apex/contrib/groupbn/__init__.py to __init__.cpython-36.pyc
    running install_egg_info
    running egg_info
    creating apex.egg-info
    writing apex.egg-info/PKG-INFO
    writing dependency_links to apex.egg-info/dependency_links.txt
    writing top-level names to apex.egg-info/top_level.txt
    writing manifest file 'apex.egg-info/SOURCES.txt'
    writing manifest file 'apex.egg-info/SOURCES.txt'
    Copying apex.egg-info to /usr/local/lib/python3.6/dist-packages/apex-0.1-py3.6.egg-info
    running install_scripts
    writing list of installed files to '/tmp/pip-record-rbk1kdhr/install-record.txt'
    Running setup.py install for apex ... [?25l[?25hdone
  Removing source in /tmp/pip-req-build-1z3lcbjz
Successfully installed apex-0.1
Cleaning up...
Removed build tracker '/tmp/pip-req-tracker-tfcuq84x'
/content/drive/My Drive/tau_fairseq/fairseq
#Budowanie fairseqa od nowa
!python setup.py build_ext --inplace
running build_ext
/usr/local/lib/python3.6/dist-packages/torch/utils/cpp_extension.py:339: UserWarning: Attempted to use ninja as the BuildExtension backend but we could not find ninja.. Falling back to using the slow distutils backend.
  warnings.warn(msg.format('we could not find ninja.'))
skipping 'fairseq/data/data_utils_fast.cpp' Cython extension (up-to-date)
skipping 'fairseq/data/token_block_utils_fast.cpp' Cython extension (up-to-date)
copying build/lib.linux-x86_64-3.6/fairseq/libbleu.cpython-36m-x86_64-linux-gnu.so -> fairseq
copying build/lib.linux-x86_64-3.6/fairseq/data/data_utils_fast.cpython-36m-x86_64-linux-gnu.so -> fairseq/data
copying build/lib.linux-x86_64-3.6/fairseq/data/token_block_utils_fast.cpython-36m-x86_64-linux-gnu.so -> fairseq/data
copying build/lib.linux-x86_64-3.6/fairseq/libnat.cpython-36m-x86_64-linux-gnu.so -> fairseq
#Preprocessing po stronie fairseq (binaryzacja i takie tam) - dokumentacja
!python fairseq_cli/preprocess.py --source-lang pl --target-lang en --trainpref corpus/train --testpref corpus/test --validpref corpus/valid --destdir output
#Trening
# !ls
# import fairseq
import numpy
# CUDA_VISIBLE_DEVICES=0
!python train.py output --optimizer adam --adam-betas '(0.9, 0.98)' --fp16 --reset-optimizer --arch transformer --clip-norm 0.0  --lr 0.0000005 --lr-scheduler inverse_sqrt --warmup-updates 4000 --dropout 0.3 --weight-decay 0.0001 --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --max-tokens 4096 --eval-bleu --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' --eval-bleu-detok moses --eval-bleu-remove-bpe --eval-bleu-print-samples --best-checkpoint-metric bleu --maximize-best-checkpoint-metric --save-dir checkpoints/fconv --no-epoch-checkpoints
# !python ianteractive.py output/ --path checkpoints/fconv/checkpoint_best.pt --buffer-size 10 --batch-size 5 --beam 5 --bpe-codes corpus/code --tokenizer moses \
#     --bpe subword_nmt --source-lang pl --target-lang en < corpus/dev-0/in.tsv > corpus/dev-0/out_temp.tsv --post-process --remove-bpe --quiet
# !python ianteractive.py output/ --path checkpoints/fconv/checkpoint_best.pt --buffer-size 10 --batch-size 5 --beam 5 --bpe-codes corpus/code --tokenizer moses \
#     --bpe subword_nmt --source-lang pl --target-lang en < corpus/test-A/in.tsv > corpus/test-A/out_temp.tsv --post-process --remove-bpe --quiet